diff options
Diffstat (limited to 'llvm/test/CodeGen')
79 files changed, 24061 insertions, 9471 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index ac3c47c..200e9d1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -395,6 +395,7 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_SADDSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_USUBSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir index 499c08f..7921de6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -15,7 +15,7 @@ define void @mul_wrong_pow_2(ptr %addr) { ret void } define void @more_than_one_use_shl_1(ptr %addr) { ret void } define void @more_than_one_use_shl_2(ptr %addr) { ret void } - define void @more_than_one_use_shl_lsl_fast(ptr %addr) #1 { ret void } + define void @more_than_one_use_shl_lsl_fast(ptr %addr) { ret void } define void @more_than_one_use_shl_lsl_slow(ptr %addr) { ret void } define void @more_than_one_use_shl_minsize(ptr %addr) #0 { ret void } define void @ldrwrox(ptr %addr) { ret void } @@ -24,7 +24,6 @@ define void @ldbbrox(ptr %addr) { ret void } define void @ldrqrox(ptr %addr) { ret void } attributes #0 = { optsize } - attributes #1 = { "target-features"="+addr-lsl-fast" } ... --- @@ -478,11 +477,10 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3 - ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]] ; CHECK-NEXT: $x2 = COPY [[ADDXrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll index 59cd87f..022aaea 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK0 -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-fast | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-slow-14 | FileCheck %s --check-prefixes=CHECK,CHECK0 +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK3 %struct.a = type [256 x i16] %struct.b = type [256 x i32] @@ -49,36 +49,20 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind { } define i32 @word(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: word: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: lsl x21, x8, #2 -; CHECK0-NEXT: ldr w20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov w0, w20 -; CHECK0-NEXT: str w20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: word: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr w20, [x0, x21, lsl #2] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov w0, w20 -; CHECK3-NEXT: str w20, [x19, x21, lsl #2] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: word: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr w20, [x0, x21, lsl #2] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: str w20, [x19, x21, lsl #2] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -90,36 +74,20 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind { } define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: doubleword: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: lsl x21, x8, #3 -; CHECK0-NEXT: ldr x20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov x0, x20 -; CHECK0-NEXT: str x20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: doubleword: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr x20, [x0, x21, lsl #3] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov x0, x20 -; CHECK3-NEXT: str x20, [x19, x21, lsl #3] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: doubleword: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr x20, [x0, x21, lsl #3] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: str x20, [x19, x21, lsl #3] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -163,20 +131,12 @@ endbb: } define i64 @gep3(ptr %p, i64 %b) { -; CHECK0-LABEL: gep3: -; CHECK0: // %bb.0: -; CHECK0-NEXT: lsl x9, x1, #3 -; CHECK0-NEXT: mov x8, x0 -; CHECK0-NEXT: ldr x0, [x0, x9] -; CHECK0-NEXT: str x1, [x8, x9] -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: gep3: -; CHECK3: // %bb.0: -; CHECK3-NEXT: mov x8, x0 -; CHECK3-NEXT: ldr x0, [x0, x1, lsl #3] -; CHECK3-NEXT: str x1, [x8, x1, lsl #3] -; CHECK3-NEXT: ret +; CHECK-LABEL: gep3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr x0, [x0, x1, lsl #3] +; CHECK-NEXT: str x1, [x8, x1, lsl #3] +; CHECK-NEXT: ret %g = getelementptr inbounds i64, ptr %p, i64 %b %l = load i64, ptr %g store i64 %b, ptr %g diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll index 573f921..e31c9a0 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -134,9 +134,8 @@ define void @test8(i64 %a, ptr noalias %src, ptr noalias %dst, i64 %n) { ; CHECK-NEXT: b.hs .LBB7_1 ; CHECK-NEXT: // %bb.3: // %if.then ; CHECK-NEXT: // in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: lsl x10, x8, #3 -; CHECK-NEXT: ldr x11, [x1, x10] -; CHECK-NEXT: str x11, [x2, x10] +; CHECK-NEXT: ldr x10, [x1, x8, lsl #3] +; CHECK-NEXT: str x10, [x2, x8, lsl #3] ; CHECK-NEXT: b .LBB7_1 ; CHECK-NEXT: .LBB7_4: // %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll index d593272..6bcd2f0 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll @@ -125,7 +125,7 @@ return: ; preds = %if.end23, %if.then3 } ; CHECK: @test -; CHECK-NOT: , uxtw #2] +; CHECK: , uxtw #2] define i32 @test(ptr %array, i8 zeroext %c, i32 %arg) { entry: %conv = zext i8 %c to i32 diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll index 3542b26..5b055a4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -201,11 +201,10 @@ define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_64x1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray64x1 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset @@ -238,11 +237,10 @@ define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_32x2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray32x2 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset @@ -275,11 +273,10 @@ define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_16x4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray16x4 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset @@ -312,11 +309,10 @@ define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_8x8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray8x8 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x8] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset diff --git a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll index 8f19553..634d1b9 100644 --- a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll +++ b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll @@ -82,13 +82,12 @@ define void @avoid_promotion_2_and(ptr nocapture noundef %arg) { ; CHECK-NEXT: eor w10, w10, w11 ; CHECK-NEXT: ldur w11, [x8, #-24] ; CHECK-NEXT: and w10, w10, w14 -; CHECK-NEXT: ldp x15, x14, [x8, #-16] -; CHECK-NEXT: ubfiz x13, x10, #1, #32 +; CHECK-NEXT: ldp x14, x13, [x8, #-16] ; CHECK-NEXT: str w10, [x8] -; CHECK-NEXT: and w10, w11, w12 -; CHECK-NEXT: ldrh w11, [x14, x13] -; CHECK-NEXT: strh w11, [x15, w10, uxtw #1] -; CHECK-NEXT: strh w12, [x14, x13] +; CHECK-NEXT: and w11, w11, w12 +; CHECK-NEXT: ldrh w15, [x13, w10, uxtw #1] +; CHECK-NEXT: strh w15, [x14, w11, uxtw #1] +; CHECK-NEXT: strh w12, [x13, w10, uxtw #1] ; CHECK-NEXT: b LBB1_1 ; CHECK-NEXT: LBB1_4: ; %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll index b5c2104..50c70c5 100644 --- a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll +++ b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll @@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux" define void @f0(ptr %a, i64 %n) { ; CHECK-LABEL: f0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 @@ -15,7 +15,6 @@ define void @f0(ptr %a, i64 %n) { ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -40 ; CHECK-NEXT: .cfi_offset w30, -48 ; CHECK-NEXT: mov x21, #1 // =0x1 ; CHECK-NEXT: mov x19, x1 @@ -27,18 +26,17 @@ define void @f0(ptr %a, i64 %n) { ; CHECK-NEXT: b.ge .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %loop.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lsl x23, x22, #2 +; CHECK-NEXT: ldr w0, [x20, x22, lsl #2] ; CHECK-NEXT: mov x1, x21 -; CHECK-NEXT: ldr w0, [x20, x23] ; CHECK-NEXT: bl g -; CHECK-NEXT: str w0, [x20, x23] +; CHECK-NEXT: str w0, [x20, x22, lsl #2] ; CHECK-NEXT: add x22, x22, #1 ; CHECK-NEXT: cmp x22, x19 ; CHECK-NEXT: b.lt .LBB0_1 ; CHECK-NEXT: .LBB0_2: // %exit ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: br label %loop @@ -64,15 +62,13 @@ exit: define void @f1(ptr %a, i64 %n) { ; CHECK-LABEL: f1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: mov x20, x0 ; CHECK-NEXT: mov x21, xzr @@ -80,19 +76,17 @@ define void @f1(ptr %a, i64 %n) { ; CHECK-NEXT: b.ge .LBB1_2 ; CHECK-NEXT: .LBB1_1: // %loop.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lsl x22, x21, #2 +; CHECK-NEXT: ldr w0, [x20, x21, lsl #2] ; CHECK-NEXT: mov x1, #1450704896 // =0x56780000 ; CHECK-NEXT: movk x1, #4660, lsl #48 -; CHECK-NEXT: ldr w0, [x20, x22] ; CHECK-NEXT: bl g -; CHECK-NEXT: str w0, [x20, x22] +; CHECK-NEXT: str w0, [x20, x21, lsl #2] ; CHECK-NEXT: add x21, x21, #1 ; CHECK-NEXT: cmp x21, x19 ; CHECK-NEXT: b.lt .LBB1_1 ; CHECK-NEXT: .LBB1_2: // %exit -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: br label %loop diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index d4ea143..b87157a 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -972,10 +972,9 @@ define void @pr38938(ptr %a0, ptr %a1) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x1] ; CHECK-NEXT: ubfx x8, x8, #21, #10 -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: ldr w9, [x0, x8] +; CHECK-NEXT: ldr w9, [x0, x8, lsl #2] ; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: str w9, [x0, x8] +; CHECK-NEXT: str w9, [x0, x8, lsl #2] ; CHECK-NEXT: ret %tmp = load i64, ptr %a1, align 8 %tmp1 = lshr i64 %tmp, 21 diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll index 491bf40..c0f7678 100644 --- a/llvm/test/CodeGen/AArch64/hadd-combine.ll +++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll @@ -903,6 +903,58 @@ define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } +define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: shadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: srhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: uhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: urhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) @@ -927,4 +979,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) -declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
\ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index 30123a3..e8dafd5 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -223,10 +223,9 @@ define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) { ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 ; CHECK-NEXT: // => This Loop Header: Depth=2 ; CHECK-NEXT: // Child Loop BB3_3 Depth 3 -; CHECK-NEXT: lsl x12, x11, #3 +; CHECK-NEXT: ldr x13, [x1, x11, lsl #3] +; CHECK-NEXT: ldr x12, [x10, x11, lsl #3] ; CHECK-NEXT: mov x14, x4 -; CHECK-NEXT: ldr x13, [x1, x12] -; CHECK-NEXT: ldr x12, [x10, x12] ; CHECK-NEXT: ldr w13, [x13] ; CHECK-NEXT: .LBB3_3: // %for.body8 ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 diff --git a/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll new file mode 100644 index 0000000..728cffe --- /dev/null +++ b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll @@ -0,0 +1,50 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t + +;--- ok.ll + +; RUN: llc -mtriple=aarch64-linux ok.ll -o - | \ +; RUN: FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=aarch64-linux ok.ll -filetype=obj -o - | \ +; RUN: llvm-readelf --notes - | FileCheck %s --check-prefix=OBJ + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458} +!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 85} + +; ASM: .section .note.gnu.property,"a",@note +; ASM-NEXT: .p2align 3, 0x0 +; ASM-NEXT: .word 4 +; ASM-NEXT: .word 24 +; ASM-NEXT: .word 5 +; ASM-NEXT: .asciz "GNU" +; 3221225473 = 0xc0000001 = GNU_PROPERTY_AARCH64_FEATURE_PAUTH +; ASM-NEXT: .word 3221225473 +; ASM-NEXT: .word 16 +; ASM-NEXT: .xword 268435458 +; ASM-NEXT: .xword 85 + +; OBJ: Displaying notes found in: .note.gnu.property +; OBJ-NEXT: Owner Data size Description +; OBJ-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note) +; OBJ-NEXT: AArch64 PAuth ABI core info: platform 0x10000002 (llvm_linux), version 0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini) + +; ERR: either both or no 'aarch64-elf-pauthabi-platform' and 'aarch64-elf-pauthabi-version' module flags must be present + +;--- err1.ll + +; RUN: not llc -mtriple=aarch64-linux err1.ll 2>&1 -o - | \ +; RUN: FileCheck %s --check-prefix=ERR + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 2} + +;--- err2.ll + +; RUN: not llc -mtriple=aarch64-linux err2.ll 2>&1 -o - | \ +; RUN: FileCheck %s --check-prefix=ERR + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 31} diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index 9e09b7f..789fd7b 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -2,8 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for vec - declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) declare i16 @llvm.sadd.sat.i16(i16, i16) diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 6f1ae02..8a0e766 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) @@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sqadd v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b -; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b -; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b -; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: sqadd v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: sqadd v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: sqadd v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.8h, v1.8h, v3.8h -; CHECK-NEXT: sqadd v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h -; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h -; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h -; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: sqadd v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: sqadd v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: sqadd v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -135,19 +145,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -196,23 +229,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-NEXT: add x8, x0, #2 -; CHECK-NEXT: add x9, x1, #2 -; CHECK-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #2 +; CHECK-SD-NEXT: add x9, x1, #2 +; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: sqadd v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -230,15 +277,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: sqadd v0.8h, v1.8h, v0.8h -; CHECK-NEXT: sqadd v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: sqadd v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: sqadd v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sqadd v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.sadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -346,23 +405,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.4s, v1.4s, v3.4s -; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s -; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s -; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: sqadd v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: sqadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: sqadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -377,23 +450,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.2d, v1.2d, v3.2d -; CHECK-NEXT: sqadd v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d -; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d -; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d -; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: sqadd v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: sqadd v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: sqadd v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll index 5200722..f65a08a 100644 --- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll +++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll @@ -100,7 +100,7 @@ exit: } ; Address calculation cheap enough on some cores. -define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" { +define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: f3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tbz w0, #0, .LBB3_2 @@ -130,7 +130,7 @@ exit: ret i32 %v } -define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" { +define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: f4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp x1, #1 diff --git a/llvm/test/CodeGen/AArch64/sms-regpress.mir b/llvm/test/CodeGen/AArch64/sms-regpress.mir new file mode 100644 index 0000000..c75eba5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-regpress.mir @@ -0,0 +1,160 @@ +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-max-mii=40 -pipeliner-register-pressure -pipeliner-ii-search-range=30 -debug-only=pipeliner 2>&1 | FileCheck %s + +# REQUIRES: asserts + +# Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. +# The specific value of II is not important. + +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr { + entry: + %0 = load double, ptr %a, align 8 + %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 + %1 = load double, ptr %arrayidx1, align 8 + %cmp133 = icmp sgt i32 %n, 0 + br i1 %cmp133, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add54, %for.body ] + ret double %res.0.lcssa + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv137 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %lsr.iv = phi ptr [ %b, %for.body.preheader ], [ %scevgep, %for.body ] + %res.0135 = phi double [ 0.000000e+00, %for.body.preheader ], [ %add54, %for.body ] + %2 = load double, ptr %lsr.iv, align 8 + %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %0) + %4 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %3) + %5 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %4) + %6 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %5) + %7 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %6) + %8 = tail call double @llvm.fmuladd.f64(double %7, double %2, double %7) + %9 = tail call double @llvm.fmuladd.f64(double %8, double %2, double %8) + %10 = tail call double @llvm.fmuladd.f64(double %9, double %2, double %9) + %11 = tail call double @llvm.fmuladd.f64(double %10, double %2, double %10) + %12 = tail call double @llvm.fmuladd.f64(double %11, double %2, double %11) + %13 = tail call double @llvm.fmuladd.f64(double %12, double %2, double %12) + %14 = tail call double @llvm.fmuladd.f64(double %13, double %2, double %13) + %15 = tail call double @llvm.fmuladd.f64(double %14, double %2, double %14) + %16 = tail call double @llvm.fmuladd.f64(double %15, double %2, double %15) + %17 = tail call double @llvm.fmuladd.f64(double %16, double %2, double %16) + %18 = tail call double @llvm.fmuladd.f64(double %17, double %2, double %17) + %add = fadd double %17, %18 + %19 = tail call double @llvm.fmuladd.f64(double %18, double %2, double %add) + %add35 = fadd double %10, %19 + %20 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %add35) + %add38 = fadd double %11, %20 + %21 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %add38) + %add41 = fadd double %12, %21 + %22 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %add41) + %add44 = fadd double %14, %15 + %add45 = fadd double %13, %add44 + %add46 = fadd double %add45, %22 + %23 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %add46) + %mul = fmul double %2, %7 + %mul51 = fmul double %1, %mul + %24 = tail call double @llvm.fmuladd.f64(double %mul51, double %9, double %23) + %25 = tail call double @llvm.fmuladd.f64(double %8, double %1, double %24) + %add54 = fadd double %res.0135, %25 + %scevgep = getelementptr i8, ptr %lsr.iv, i64 8 + %lsr.iv.next = add nsw i64 %lsr.iv137, -1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + declare double @llvm.fmuladd.f64(double, double, double) + +... +--- +name: kernel +tracksRegLiveness: true +liveins: + - { reg: '$x0', virtual-reg: '%10' } + - { reg: '$x1', virtual-reg: '%11' } + - { reg: '$w2', virtual-reg: '%12' } +body: | + bb.0.entry: + successors: %bb.1, %bb.4 + liveins: $x0, $x1, $w2 + + %12:gpr32common = COPY $w2 + %11:gpr64 = COPY $x1 + %10:gpr64common = COPY $x0 + dead $wzr = SUBSWri %12, 1, 0, implicit-def $nzcv + Bcc 10, %bb.1, implicit $nzcv + + bb.4: + %13:fpr64 = FMOVD0 + B %bb.2 + + bb.1.for.body.preheader: + %0:fpr64 = LDRDui %10, 0 :: (load (s64) from %ir.a) + %1:fpr64 = LDRDui %10, 1 :: (load (s64) from %ir.arrayidx1) + %16:gpr32 = ORRWrs $wzr, %12, 0 + %2:gpr64all = SUBREG_TO_REG 0, killed %16, %subreg.sub_32 + %15:fpr64 = FMOVD0 + B %bb.3 + + bb.2.for.cond.cleanup: + %3:fpr64 = PHI %13, %bb.4, %7, %bb.3 + $d0 = COPY %3 + RET_ReallyLR implicit $d0 + + bb.3.for.body: + successors: %bb.2, %bb.3 + + %4:gpr64sp = PHI %2, %bb.1, %9, %bb.3 + %5:gpr64sp = PHI %11, %bb.1, %8, %bb.3 + %6:fpr64 = PHI %15, %bb.1, %7, %bb.3 + early-clobber %17:gpr64sp, %18:fpr64 = LDRDpost %5, 8 :: (load (s64) from %ir.lsr.iv) + %19:fpr64 = nofpexcept FMADDDrrr %0, %18, %0, implicit $fpcr + %20:fpr64 = nofpexcept FMADDDrrr %19, %18, %19, implicit $fpcr + %21:fpr64 = nofpexcept FMADDDrrr %20, %18, %20, implicit $fpcr + %22:fpr64 = nofpexcept FMADDDrrr %21, %18, %21, implicit $fpcr + %23:fpr64 = nofpexcept FMADDDrrr %22, %18, %22, implicit $fpcr + %24:fpr64 = nofpexcept FMADDDrrr %23, %18, %23, implicit $fpcr + %25:fpr64 = nofpexcept FMADDDrrr %24, %18, %24, implicit $fpcr + %26:fpr64 = nofpexcept FMADDDrrr %25, %18, %25, implicit $fpcr + %27:fpr64 = nofpexcept FMADDDrrr %26, %18, %26, implicit $fpcr + %28:fpr64 = nofpexcept FMADDDrrr %27, %18, %27, implicit $fpcr + %29:fpr64 = nofpexcept FMADDDrrr %28, %18, %28, implicit $fpcr + %30:fpr64 = nofpexcept FMADDDrrr %29, %18, %29, implicit $fpcr + %31:fpr64 = nofpexcept FMADDDrrr %30, %18, %30, implicit $fpcr + %32:fpr64 = nofpexcept FMADDDrrr %31, %18, %31, implicit $fpcr + %33:fpr64 = nofpexcept FMADDDrrr %32, %18, %32, implicit $fpcr + %34:fpr64 = nofpexcept FMADDDrrr %33, %18, %33, implicit $fpcr + %35:fpr64 = nofpexcept FADDDrr %33, %34, implicit $fpcr + %36:fpr64 = nofpexcept FMADDDrrr %34, %18, killed %35, implicit $fpcr + %37:fpr64 = nofpexcept FADDDrr %26, killed %36, implicit $fpcr + %38:fpr64 = nofpexcept FMADDDrrr %19, %18, killed %37, implicit $fpcr + %39:fpr64 = nofpexcept FADDDrr %27, killed %38, implicit $fpcr + %40:fpr64 = nofpexcept FMADDDrrr %20, %18, killed %39, implicit $fpcr + %41:fpr64 = nofpexcept FADDDrr %28, killed %40, implicit $fpcr + %42:fpr64 = nofpexcept FMADDDrrr %21, %18, killed %41, implicit $fpcr + %43:fpr64 = nofpexcept FADDDrr %30, %31, implicit $fpcr + %44:fpr64 = nofpexcept FADDDrr %29, killed %43, implicit $fpcr + %45:fpr64 = nofpexcept FADDDrr killed %44, killed %42, implicit $fpcr + %46:fpr64 = nofpexcept FMADDDrrr %22, %18, killed %45, implicit $fpcr + %47:fpr64 = nofpexcept FMULDrr %18, %23, implicit $fpcr + %48:fpr64 = nofpexcept FMULDrr %1, killed %47, implicit $fpcr + %49:fpr64 = nofpexcept FMADDDrrr killed %48, %25, killed %46, implicit $fpcr + %50:fpr64 = nofpexcept FMADDDrrr %24, %1, killed %49, implicit $fpcr + %7:fpr64 = nofpexcept FADDDrr %6, killed %50, implicit $fpcr + %8:gpr64all = COPY %17 + %51:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv + %9:gpr64all = COPY %51 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index abeb4b3..4d755f4 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -2,8 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for vec - declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) declare i16 @llvm.ssub.sat.i16(i16, i16) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index d1f843a..a8c1276 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) @@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sqsub v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b -; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b -; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b -; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: sqsub v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: sqsub v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: sqsub v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.8h, v1.8h, v3.8h -; CHECK-NEXT: sqsub v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h -; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h -; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h -; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: sqsub v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: sqsub v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: sqsub v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -136,19 +146,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -197,23 +230,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-NEXT: add x8, x0, #2 -; CHECK-NEXT: add x9, x1, #2 -; CHECK-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #2 +; CHECK-SD-NEXT: add x9, x1, #2 +; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: sqsub v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -231,15 +278,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: sqsub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: sqsub v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: sqsub v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: sqsub v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sqsub v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.ssub.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -349,23 +408,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.4s, v1.4s, v3.4s -; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s -; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: sqsub v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: sqsub v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: sqsub v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -380,23 +453,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.2d, v1.2d, v3.2d -; CHECK-NEXT: sqsub v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d -; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d -; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d -; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: sqsub v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: sqsub v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: sqsub v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll new file mode 100644 index 0000000..bcfc7b3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind { +; CHECK-LABEL: masked_load_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> undef), !nontemporal !0 + ret <4 x i32> %load +} + +define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind { +; CHECK-LABEL: masked_store_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: shl v1.4s, v1.4s, #31 +; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> %mask), !nontemporal !0 + ret void +} + +define <4 x i32> @load_v4i32(ptr %a) nounwind { +; CHECK-LABEL: load_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i32> undef), !nontemporal !0 + ret <4 x i32> %load +} + +define void @store_v4i32(<4 x i32> %x, ptr %a) nounwind { +; CHECK-LABEL: store_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>), !nontemporal !0 + ret void +} + +define <vscale x 4 x i32> @masked_load_nxv4i32(ptr %a, <vscale x 4 x i1> %mask) nounwind { +; CHECK-LABEL: masked_load_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef), !nontemporal !0 + ret <vscale x 4 x i32> %load +} + +define void @masked_store_nxv4i32(<vscale x 4 x i32> %x, ptr %a, <vscale x 4 x i1> %mask) nounwind { +; CHECK-LABEL: masked_store_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %x, ptr %a, i32 1, <vscale x 4 x i1> %mask), !nontemporal !0 + ret void +} + +declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) +declare void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32>, ptr, i32, <vscale x 4 x i1>) +declare <4 x i32> @llvm.masked.load.v4i32(ptr, i32, <4 x i1>, <4 x i32>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) + +!0 = !{i32 1} diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index f0bbed5..30ff700 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) @@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.16b, v1.16b, v3.16b -; CHECK-NEXT: uqadd v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b -; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b -; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b -; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: uqadd v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: uqadd v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: uqadd v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.8h, v1.8h, v3.8h -; CHECK-NEXT: uqadd v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h -; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h -; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h -; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: uqadd v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: uqadd v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: uqadd v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -135,16 +145,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ldr s2, [x1] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: uaddl v1.8h, v1.8b, v2.8b -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s1, [x0] +; CHECK-SD-NEXT: ldr s2, [x1] +; CHECK-SD-NEXT: movi d0, #0xff00ff00ff00ff +; CHECK-SD-NEXT: uaddl v1.8h, v1.8b, v2.8b +; CHECK-SD-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -194,24 +227,38 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-NEXT: ldrh w10, [x0, #2] -; CHECK-NEXT: ldrh w11, [x1, #2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrh w8, [x0] +; CHECK-SD-NEXT: ldrh w9, [x1] +; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-SD-NEXT: ldrh w10, [x0, #2] +; CHECK-SD-NEXT: ldrh w11, [x1, #2] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: uqadd v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -229,15 +276,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: uqadd v0.8h, v1.8h, v0.8h -; CHECK-NEXT: uqadd v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: uqadd v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: uqadd v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: uqadd v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.uadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -336,23 +395,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.4s, v1.4s, v3.4s -; CHECK-NEXT: uqadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s -; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s -; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s -; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: uqadd v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: uqadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: uqadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -367,23 +440,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.2d, v1.2d, v3.2d -; CHECK-NEXT: uqadd v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d -; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d -; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d -; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: uqadd v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: uqadd v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: uqadd v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 82c0327..3bc2796 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) @@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.16b, v1.16b, v3.16b -; CHECK-NEXT: uqsub v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b -; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b -; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b -; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: uqsub v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: uqsub v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: uqsub v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.8h, v1.8h, v3.8h -; CHECK-NEXT: uqsub v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h -; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h -; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h -; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: uqsub v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: uqsub v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: uqsub v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -136,16 +146,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -193,22 +226,36 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: ldrh w10, [x0, #2] -; CHECK-NEXT: ldrh w11, [x1, #2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrh w8, [x0] +; CHECK-SD-NEXT: ldrh w9, [x1] +; CHECK-SD-NEXT: ldrh w10, [x0, #2] +; CHECK-SD-NEXT: ldrh w11, [x1, #2] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: uqsub v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -226,15 +273,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: uqsub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: uqsub v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: uqsub v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: uqsub v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: uqsub v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.usub.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -334,23 +393,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.4s, v1.4s, v3.4s -; CHECK-NEXT: uqsub v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s -; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: uqsub v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: uqsub v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: uqsub v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -365,23 +438,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.2d, v1.2d, v3.2d -; CHECK-NEXT: uqsub v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d -; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d -; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d -; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: uqsub v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: uqsub v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: uqsub v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index c25b0f2..78d9084 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -16,7 +16,6 @@ define i32 @divergent_if_swap_brtarget_order0(i32 %value) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB0_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %c = icmp ne i32 %value, 0 @@ -44,7 +43,6 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB1_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %c = icmp ne i32 %value, 0 @@ -74,7 +72,6 @@ define i32 @divergent_if_nonboolean_condition0(i32 %value) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB2_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %c = trunc i32 %value to i1 @@ -106,7 +103,6 @@ define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB3_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %value = load i32, ptr addrspace(1) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll index 303dc46..5c22d5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -131,8 +131,6 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) { ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB1_2: ; %bb1 -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %val = call i1 @llvm.amdgcn.is.private(ptr %ptr) br i1 %val, label %bb0, label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll index 63702d2..e005c38 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -131,8 +131,6 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) { ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB1_2: ; %bb1 -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) br i1 %val, label %bb0, label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 352adac..af6f6913 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -39,9 +39,9 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 ; GFX7LESS-NEXT: v_mad_u32_u24 v0, v0, 5, s4 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -65,11 +65,11 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB0_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v1 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, 5, s4 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -92,11 +92,11 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB0_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s4 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -253,8 +253,8 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, i32 %additive) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s6, v0 ; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -504,11 +504,11 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB2_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -544,11 +544,11 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB2_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_add_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -944,7 +944,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 @@ -952,6 +951,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 ; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -974,7 +974,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB4_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s2, v0 ; GFX8-NEXT: v_readfirstlane_b32 s3, v1 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1006,7 +1005,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB4_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s2, v0 ; GFX9-NEXT: v_readfirstlane_b32 s3, v1 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1219,11 +1217,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB5_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mul_lo_u32 v3, s3, v2 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s2, v2, v[0:1] ; GFX8-NEXT: s_mov_b32 s7, 0xf000 @@ -1258,11 +1256,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB5_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, v[0:1] ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 @@ -1530,10 +1528,10 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 ; GFX7LESS-NEXT: v_mul_u32_u24_e32 v0, 5, v0 ; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -1557,12 +1555,12 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB7_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v1 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -1585,12 +1583,12 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB7_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -1751,8 +1749,8 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, i32 %subitive) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s6, v0 ; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2006,11 +2004,11 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB9_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -2046,11 +2044,11 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB9_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -2446,7 +2444,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 @@ -2454,6 +2451,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 ; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -2477,7 +2475,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB11_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v2 @@ -2487,6 +2484,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -2509,7 +2507,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB11_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v2 @@ -2519,6 +2516,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3081,11 +3079,11 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB14_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_and_b32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3121,11 +3119,11 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB14_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_and_b32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3355,11 +3353,11 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB15_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_or_b32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3395,11 +3393,11 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB15_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_or_b32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3629,11 +3627,11 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB16_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_xor_b32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3669,11 +3667,11 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB16_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_xor_b32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3903,11 +3901,11 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB17_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_max_i32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3943,11 +3941,11 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB17_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_max_i32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -4151,7 +4149,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, 1 @@ -4162,6 +4159,7 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -4182,7 +4180,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB18_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_bfrev_b32_e32 v0, 1 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4216,7 +4213,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB18_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v0, 1 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4419,11 +4415,11 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB19_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_min_i32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -4459,11 +4455,11 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB19_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_min_i32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -4667,7 +4663,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, -2 @@ -4678,6 +4673,7 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -4698,7 +4694,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB20_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_bfrev_b32_e32 v0, -2 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4732,7 +4727,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB20_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v0, -2 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4935,11 +4929,11 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB21_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_max_u32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -4975,11 +4969,11 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB21_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_max_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -5183,7 +5177,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 @@ -5193,6 +5186,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s5 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -5214,7 +5208,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB22_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 @@ -5226,6 +5219,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -5246,7 +5240,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB22_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -5258,6 +5251,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -5446,11 +5440,11 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB23_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_min_u32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -5486,11 +5480,11 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB23_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_min_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -5694,7 +5688,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc @@ -5704,6 +5697,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -5725,7 +5719,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB24_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc @@ -5737,6 +5730,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -5757,7 +5751,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB24_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc @@ -5769,6 +5762,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index 19a1d2d9..c9076a9 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -186,7 +186,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 { ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: .LBB1_8: ; %atomicrmw.phi ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 9865883..bf4302c 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -5678,22 +5678,18 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) { ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x4 -; GFX11-NEXT: scratch_store_b128 off, v[18:21], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[10:13], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[6:9], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[2:5], s0 -; GFX11-NEXT: scratch_store_b16 off, v1, s0 offset:128 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 +; GFX11-NEXT: s_clause 0x5 +; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[30:33], s1 -; GFX11-NEXT: scratch_store_b128 off, v[26:29], s2 -; GFX11-NEXT: scratch_store_b128 off, v[22:25], s3 -; GFX11-NEXT: scratch_store_b128 off, v[14:17], s0 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96 +; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0 %ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1 @@ -8827,19 +8823,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: global_load_u16 v32, v[1:2], off offset:54 ; GFX11-NEXT: global_load_u16 v33, v[1:2], off offset:58 ; GFX11-NEXT: global_load_u16 v1, v[1:2], off offset:62 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s3, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s4, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s5, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s6, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s7, s0, 0x90 -; GFX11-NEXT: s_add_i32 s8, s0, 0x70 -; GFX11-NEXT: s_add_i32 s9, s0, 0x60 -; GFX11-NEXT: s_add_i32 s10, s0, 0x50 -; GFX11-NEXT: s_add_i32 s11, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(31) ; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v3 ; GFX11-NEXT: s_waitcnt vmcnt(30) @@ -8936,23 +8919,23 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: v_cvt_f64_f32_e32 v[5:6], v5 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[3:4], v2 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[1:2], v37 -; GFX11-NEXT: scratch_store_b128 off, v[96:99], s1 -; GFX11-NEXT: scratch_store_b128 off, v[84:87], s2 -; GFX11-NEXT: scratch_store_b128 off, v[80:83], s3 -; GFX11-NEXT: scratch_store_b128 off, v[68:71], s4 -; GFX11-NEXT: scratch_store_b128 off, v[64:67], s5 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s6 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s7 -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s8 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s9 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s10 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s11 -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_store_b128 v0, v[96:99], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[84:87], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[80:83], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[68:71], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[64:67], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] %load = load <32 x bfloat>, ptr addrspace(1) %ptr %fpext = fpext <32 x bfloat> %load to <32 x double> diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll index ac50fb8..da609bf 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll @@ -41,7 +41,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add ; GCN-NEXT: .LBB0_2: ; %endif ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v1, 0x3d0000 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: global_store_dword v1, v0, s[0:1] offset:2300 ; GCN-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll index 069c57e..6dabd8c 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -103,7 +103,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB0_4: ; %exit -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v3 op_sel_hi:[0,0] ; GFX9-NEXT: s_movk_i32 s4, 0x8000 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0 @@ -131,7 +130,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB0_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -266,7 +264,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace( ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB1_4: ; %exit -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,1] ; GFX9-NEXT: s_movk_i32 s4, 0x8000 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0 @@ -294,7 +291,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace( ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB1_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -431,7 +427,6 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1 ; GFX9-NEXT: .LBB2_4: ; %exit ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_cmp_ge_f16_e32 vcc, 0.5, v2 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x3800 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc @@ -461,7 +456,6 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB2_4: ; %exit ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -665,7 +659,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB3_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -871,7 +864,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB4_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1081,7 +1073,6 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace( ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB5_4: ; %exit ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -1432,7 +1423,6 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB7_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v5 ; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00 @@ -1724,7 +1714,6 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB8_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v5 ; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00 diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll index db89ad6..3b2f15c 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -114,7 +114,6 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: .LBB3_2: ; %bb2 ; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5] -; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: i1_arg_i1_use: diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll index acadee2..401cbce 100644 --- a/llvm/test/CodeGen/AMDGPU/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -1561,34 +1561,28 @@ define <33 x i32> @v33i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 -; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16 -; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 -; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 +; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s4, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128 +; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <33 x i32>, ptr addrspace(1) %ptr @@ -1850,34 +1844,28 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 -; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16 -; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 -; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 +; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s4, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128 +; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr @@ -2143,33 +2131,24 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144 ; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s3, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s4, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s5, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s6, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s7, s0, 0x90 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s5 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s6 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s7 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 +; GFX11-NEXT: scratch_store_b32 v0, v33, off ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index c1d6826..3b078c4 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -1989,256 +1989,138 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 { ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 ; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x7 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:1024 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:512 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:256 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x790 -; GFX11-NEXT: s_add_i32 s2, s0, 0x780 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x770 -; GFX11-NEXT: s_add_i32 s2, s0, 0x760 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x750 -; GFX11-NEXT: s_add_i32 s2, s0, 0x740 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x730 -; GFX11-NEXT: s_add_i32 s2, s0, 0x720 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x710 -; GFX11-NEXT: s_add_i32 s2, s0, 0x700 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x690 -; GFX11-NEXT: s_add_i32 s2, s0, 0x680 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x670 -; GFX11-NEXT: s_add_i32 s2, s0, 0x660 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x650 -; GFX11-NEXT: s_add_i32 s2, s0, 0x640 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x630 -; GFX11-NEXT: s_add_i32 s2, s0, 0x620 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x610 -; GFX11-NEXT: s_add_i32 s2, s0, 0x600 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x590 -; GFX11-NEXT: s_add_i32 s2, s0, 0x580 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x570 -; GFX11-NEXT: s_add_i32 s2, s0, 0x560 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x550 -; GFX11-NEXT: s_add_i32 s2, s0, 0x540 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x530 -; GFX11-NEXT: s_add_i32 s2, s0, 0x520 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x510 -; GFX11-NEXT: s_add_i32 s2, s0, 0x500 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x490 -; GFX11-NEXT: s_add_i32 s2, s0, 0x480 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x470 -; GFX11-NEXT: s_add_i32 s2, s0, 0x460 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x450 -; GFX11-NEXT: s_add_i32 s2, s0, 0x440 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x430 -; GFX11-NEXT: s_add_i32 s2, s0, 0x420 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x410 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x390 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x380 -; GFX11-NEXT: s_add_i32 s2, s0, 0x370 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x360 -; GFX11-NEXT: s_add_i32 s2, s0, 0x350 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x340 -; GFX11-NEXT: s_add_i32 s2, s0, 0x330 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x320 -; GFX11-NEXT: s_add_i32 s2, s0, 0x310 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x300 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x290 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x280 -; GFX11-NEXT: s_add_i32 s2, s0, 0x270 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x260 -; GFX11-NEXT: s_add_i32 s2, s0, 0x250 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x240 -; GFX11-NEXT: s_add_i32 s2, s0, 0x230 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x220 -; GFX11-NEXT: s_add_i32 s2, s0, 0x210 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x190 -; GFX11-NEXT: s_add_i32 s2, s0, 0x180 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x170 -; GFX11-NEXT: s_add_i32 s2, s0, 0x160 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x150 -; GFX11-NEXT: s_add_i32 s2, s0, 0x140 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x130 -; GFX11-NEXT: s_add_i32 s2, s0, 0x120 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: s_add_i32 s2, s0, 0xf0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xd0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xb0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x90 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2032 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2016 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2000 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1984 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1968 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1952 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1936 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1920 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1904 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1888 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1872 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1856 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1840 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1824 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1808 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1792 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1776 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1760 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1744 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1728 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1712 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1696 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1680 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1664 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1648 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1632 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1616 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1600 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1584 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1568 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1552 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1536 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1520 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1504 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1488 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1472 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1456 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1440 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1424 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1408 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1392 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1376 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1360 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1344 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1328 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1312 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1296 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1280 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1264 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1248 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1232 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1216 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1200 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1184 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1168 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1152 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1136 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1120 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1104 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1088 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1072 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1056 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1040 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1024 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1008 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:992 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:976 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:960 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:944 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:928 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:912 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:896 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:880 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:864 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:848 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:832 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:816 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:800 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:784 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:768 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:752 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:736 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:720 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:704 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:688 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:672 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:656 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:640 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:624 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:608 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:592 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:576 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:560 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:544 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:528 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:512 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:496 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:480 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:464 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:448 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:432 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:416 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:400 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:384 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:368 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:352 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:336 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:320 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:304 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:288 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:272 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:256 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: ret <512 x i32> zeroinitializer @@ -2636,7 +2518,6 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-LABEL: return_72xi32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212 ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208 @@ -2651,93 +2532,82 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172 ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168 ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164 -; GFX11-NEXT: s_clause 0x14 -; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:124 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:120 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 +; GFX11-NEXT: s_clause 0x11 +; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16 +; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:12 +; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:8 +; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:32 +; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:24 +; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:48 +; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:44 +; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:40 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:64 +; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:60 +; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:56 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:80 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:76 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:72 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:96 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:92 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:88 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_load_b32 v23, off, s32 offset:112 +; GFX11-NEXT: scratch_load_b32 v22, off, s32 offset:108 +; GFX11-NEXT: scratch_load_b32 v21, off, s32 offset:104 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:136 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 +; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:128 +; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:124 +; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:120 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v10, off, s32 offset:152 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 +; GFX11-NEXT: scratch_load_b32 v15, off, s32 offset:144 +; GFX11-NEXT: scratch_load_b32 v14, off, s32 offset:140 +; GFX11-NEXT: scratch_load_b32 v13, off, s32 offset:136 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: s_clause 0xd -; GFX11-NEXT: scratch_load_b32 v8, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v7, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v6, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v5, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v9, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:160 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:156 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:152 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:148 +; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:132 +; GFX11-NEXT: scratch_load_b32 v16, off, s32 offset:116 +; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:100 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:84 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 +; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:36 +; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x100 -; GFX11-NEXT: s_add_i32 s3, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s34, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s35, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s36, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s37, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s38, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s39, s0, 0x90 -; GFX11-NEXT: s_add_i32 s40, s0, 0x70 -; GFX11-NEXT: s_add_i32 s41, s0, 0x60 -; GFX11-NEXT: s_add_i32 s42, s0, 0x50 -; GFX11-NEXT: s_add_i32 s43, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(10) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[60:63], off offset:272 ; GFX11-NEXT: s_waitcnt vmcnt(9) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[12:15], off offset:256 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[16:19], off offset:240 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[60:63], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[20:23], off offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[56:59], s34 +; GFX11-NEXT: scratch_store_b128 v0, v[56:59], off offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[41:44], s35 +; GFX11-NEXT: scratch_store_b128 v0, v[41:44], off offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[37:40], s36 +; GFX11-NEXT: scratch_store_b128 v0, v[37:40], off offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s37 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s38 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s39 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s40 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s41 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s42 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s43 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:164 ; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:168 @@ -3306,7 +3176,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-LABEL: call_72xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s46, s33 +; GFX11-NEXT: s_mov_b32 s34, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00 @@ -3353,11 +3223,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 +; GFX11-NEXT: s_add_i32 s2, s33, 0x200 +; GFX11-NEXT: v_writelane_b32 v60, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 -; GFX11-NEXT: s_add_i32 s0, s33, 0x200 -; GFX11-NEXT: v_writelane_b32 v60, s30, 0 -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, 0 +; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0 @@ -3373,14 +3243,14 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0 ; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0 ; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0 -; GFX11-NEXT: s_mov_b32 s45, return_72xi32@abs32@hi -; GFX11-NEXT: s_mov_b32 s44, return_72xi32@abs32@lo +; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v60, s31, 1 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624 ; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:640 -; GFX11-NEXT: s_add_i32 s0, s32, 0xa0 +; GFX11-NEXT: s_add_i32 s2, s32, 0xa0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_mov_b32_e32 v32, v48 ; GFX11-NEXT: s_clause 0x9 @@ -3431,38 +3301,38 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6 ; GFX11-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9 ; GFX11-NEXT: v_mov_b32_e32 v9, v20 -; GFX11-NEXT: scratch_store_b32 off, v11, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x90 +; GFX11-NEXT: scratch_store_b32 off, v11, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x90 ; GFX11-NEXT: v_mov_b32_e32 v11, v22 -; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x80 +; GFX11-NEXT: scratch_store_b128 off, v[4:7], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x80 ; GFX11-NEXT: v_mov_b32_e32 v5, v16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-NEXT: v_mov_b32_e32 v0, 24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x70 +; GFX11-NEXT: s_add_i32 s2, s32, 0x70 ; GFX11-NEXT: v_mov_b32_e32 v6, v17 -; GFX11-NEXT: scratch_store_b128 off, v[12:15], s0 +; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2 ; GFX11-NEXT: v_mov_b32_e32 v13, v24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x6c +; GFX11-NEXT: s_add_i32 s2, s32, 0x6c ; GFX11-NEXT: v_mov_b32_e32 v7, v18 -; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x60 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x60 ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26 -; GFX11-NEXT: scratch_store_b96 off, v[56:58], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x50 +; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x50 ; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45 -; GFX11-NEXT: scratch_store_b128 off, v[40:43], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 64 ; GFX11-NEXT: v_mov_b32_e32 v14, v25 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 48 +; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 48 ; GFX11-NEXT: v_mov_b32_e32 v16, v27 -; GFX11-NEXT: scratch_store_b128 off, v[36:39], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 32 +; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 32 ; GFX11-NEXT: v_mov_b32_e32 v30, v46 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 16 -; GFX11-NEXT: scratch_store_b128 off, v[32:35], s0 +; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 16 +; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2 ; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, 42 @@ -3470,10 +3340,10 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572 ; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556 ; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540 -; GFX11-NEXT: s_add_i32 s0, s33, 0x400 +; GFX11-NEXT: s_add_i32 s2, s33, 0x400 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v0, s0 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_load_b32 v59, off, s33 ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4 @@ -3493,7 +3363,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0xf600 -; GFX11-NEXT: s_mov_b32 s33, s46 +; GFX11-NEXT: s_mov_b32 s33, s34 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll index 433a836..3b3e107 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -33,7 +33,7 @@ define void @func_use_lds_global() { ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -103,7 +103,7 @@ define void @func_use_lds_global_constexpr_cast() { ; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast: ; GFX8-SDAG: ; %bb.0: ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-SDAG-NEXT: s_trap 2 @@ -171,7 +171,7 @@ define void @func_uses_lds_multi(i1 %cond) { ; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_2 ; GFX8-SDAG-NEXT: ; %bb.1: ; %bb1 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 1 -; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -181,7 +181,7 @@ define void @func_uses_lds_multi(i1 %cond) { ; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_4 ; GFX8-SDAG-NEXT: ; %bb.3: ; %bb0 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -189,7 +189,7 @@ define void @func_uses_lds_multi(i1 %cond) { ; GFX8-SDAG-NEXT: .LBB2_4: ; %ret ; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 2 -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -379,7 +379,7 @@ define void @func_uses_lds_code_after(ptr addrspace(1) %ptr) { ; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v2 -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 1 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -472,7 +472,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX8-SDAG-NEXT: ; %bb.1: ; %use.bb ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 -; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -481,7 +481,6 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX8-SDAG-NEXT: .LBB4_2: ; %ret ; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-GISEL-LABEL: func_uses_lds_phi_after: @@ -506,7 +505,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX8-GISEL-NEXT: .LBB4_2: ; %ret ; GFX8-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-SDAG-LABEL: func_uses_lds_phi_after: @@ -527,7 +526,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: .LBB4_2: ; %ret ; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: func_uses_lds_phi_after: @@ -548,7 +547,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: .LBB4_2: ; %ret ; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-LABEL: func_uses_lds_phi_after: @@ -570,7 +569,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: .LBB4_3: ; %ret ; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] -; SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: s_setpc_b64 s[30:31] ; SDAG-NEXT: .LBB4_4: ; SDAG-NEXT: s_endpgm @@ -594,7 +593,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: .LBB4_3: ; %ret ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] ; GISEL-NEXT: .LBB4_4: ; GISEL-NEXT: s_endpgm @@ -616,6 +615,3 @@ ret: ; CHECK: {{.*}} ; GFX8: {{.*}} ; GFX9: {{.*}} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll index 5e76dfd..4477f02 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll @@ -157,7 +157,6 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; VI-NEXT: .LBB2_2: ; VI-NEXT: s_or_b64 exec, exec, s[6:7] ; VI-NEXT: s_mov_b64 s[6:7], exec -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_readfirstlane_b32 s8, v1 ; VI-NEXT: v_mbcnt_lo_u32_b32 v1, s6, 0 ; VI-NEXT: v_mbcnt_hi_u32_b32 v1, s7, v1 @@ -203,15 +202,14 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; VI-NEXT: ; %bb.7: ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_add_rtn_f32 v2, v2, v1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: .LBB2_8: ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_readfirstlane_b32 s2, v2 ; VI-NEXT: v_add_f32_e32 v2, s2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 @@ -240,7 +238,6 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; GFX9-NEXT: .LBB2_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX9-NEXT: s_mov_b64 s[6:7], exec -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s8, v1 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v1, s6, 0 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v1, s7, v1 @@ -285,16 +282,15 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; GFX9-NEXT: s_cbranch_execz .LBB2_8 ; GFX9-NEXT: ; %bb.7: ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_f32 v2, v2, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB2_8: ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s2, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_add_f32_e32 v0, s2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index 138dd53..d19ef75 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1260,8 +1260,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB11_5: ; %end -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; GFX11-NEXT: .LBB11_6: ; GFX11-NEXT: s_mov_b64 exec, 0 @@ -1525,8 +1523,6 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; GFX11-NEXT: .LBB13_6: ; GFX11-NEXT: s_mov_b64 exec, 0 diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll index eef5f57..ecebbb9 100644 --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -32,7 +32,7 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: S_WAITCNT_soft 3952 + ; GCN-NEXT: S_WAITCNT 3952 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: entry: @@ -79,7 +79,7 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: S_WAITCNT_soft 3952 + ; GCN-NEXT: S_WAITCNT 3952 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: entry: diff --git a/llvm/test/CodeGen/Generic/allow-check.ll b/llvm/test/CodeGen/Generic/allow-check.ll index 43dab68..a084889 100644 --- a/llvm/test/CodeGen/Generic/allow-check.ll +++ b/llvm/test/CodeGen/Generic/allow-check.ll @@ -2,6 +2,7 @@ ; REQUIRES: host-byteorder-little-endian ; -global-isel=1 is unsupported. +; XFAIL: target=loongarch{{.*}} ; XFAIL: target=nvptx{{.*}} ; XFAIL: target=sparc{{.*}} ; XFAIL: target=hexagon-{{.*}} diff --git a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll index b7f8b8a..8980049 100644 --- a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ;RUN: llc < %s --mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=BE ;RUN: llc < %s --mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=LE +;RUN: llc < %s --mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -ppc-gather-alias-max-depth=0 | FileCheck %s -check-prefix=FORWARD define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) { ; BE-LABEL: test_large_vec_vaarg: @@ -35,6 +36,22 @@ define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) { ; LE-NEXT: lxvd2x 0, 0, 3 ; LE-NEXT: xxswapd 35, 0 ; LE-NEXT: blr +; +; FORWARD-LABEL: test_large_vec_vaarg: +; FORWARD: # %bb.0: +; FORWARD-NEXT: ld 3, -8(1) +; FORWARD-NEXT: addi 3, 3, 15 +; FORWARD-NEXT: rldicr 3, 3, 0, 59 +; FORWARD-NEXT: addi 4, 3, 16 +; FORWARD-NEXT: std 4, -8(1) +; FORWARD-NEXT: ld 4, -8(1) +; FORWARD-NEXT: lvx 2, 0, 3 +; FORWARD-NEXT: addi 4, 4, 15 +; FORWARD-NEXT: rldicr 3, 4, 0, 59 +; FORWARD-NEXT: addi 4, 3, 16 +; FORWARD-NEXT: std 4, -8(1) +; FORWARD-NEXT: lvx 3, 0, 3 +; FORWARD-NEXT: blr %args = alloca ptr, align 4 %x = va_arg ptr %args, <8 x i32> ret <8 x i32> %x diff --git a/llvm/test/CodeGen/PowerPC/sms-regpress.mir b/llvm/test/CodeGen/PowerPC/sms-regpress.mir index cebd78a..b01115c 100644 --- a/llvm/test/CodeGen/PowerPC/sms-regpress.mir +++ b/llvm/test/CodeGen/PowerPC/sms-regpress.mir @@ -1,41 +1,30 @@ -# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s +# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s # REQUIRES: asserts # Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. # The specific value of II is not important. -# CHECK: Try to schedule with 21 -# CHECK: Can't schedule -# CHECK: Try to schedule with 22 -# CHECK: Can't schedule -# CHECK: Try to schedule with 23 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 24 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 25 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 26 -# CHECK: Schedule Found? 1 (II=26) +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} --- | - ; ModuleID = 'a.ll' - source_filename = "a.c" target datalayout = "e-m:e-Fn32-i64:64-n32:64" target triple = "ppc64le" - ; Function Attrs: nofree nosync nounwind memory(argmem: read) uwtable - define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 { + define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr { entry: - %0 = load double, ptr %a, align 8, !tbaa !3 - %arrayidx1 = getelementptr inbounds double, ptr %a, i64 1 - %1 = load double, ptr %arrayidx1, align 8, !tbaa !3 + %0 = load double, ptr %a, align 8 + %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 + %1 = load double, ptr %arrayidx1, align 8 %cmp163 = icmp sgt i32 %n, 0 br i1 %cmp163, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry - %wide.trip.count = zext i32 %n to i64 - %scevgep1 = getelementptr i8, ptr %b, i64 -8 + %wide.trip.count = zext nneg i32 %n to i64 + %scevgep167 = getelementptr i8, ptr %b, i64 -8 call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count) br label %for.body @@ -43,11 +32,11 @@ %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %30, %for.body ] ret double %res.0.lcssa - for.body: ; preds = %for.body, %for.body.preheader + for.body: ; preds = %for.body.preheader, %for.body %res.0165 = phi double [ 0.000000e+00, %for.body.preheader ], [ %30, %for.body ] - %2 = phi ptr [ %scevgep1, %for.body.preheader ], [ %3, %for.body ] + %2 = phi ptr [ %scevgep167, %for.body.preheader ], [ %3, %for.body ] %3 = getelementptr i8, ptr %2, i64 8 - %4 = load double, ptr %3, align 8, !tbaa !3 + %4 = load double, ptr %3, align 8 %5 = tail call double @llvm.fmuladd.f64(double %0, double %4, double %0) %6 = tail call double @llvm.fmuladd.f64(double %5, double %4, double %5) %7 = tail call double @llvm.fmuladd.f64(double %6, double %4, double %6) @@ -92,152 +81,23 @@ %mul66 = fmul double %12, %mul65 %30 = tail call double @llvm.fmuladd.f64(double %mul66, double %10, double %res.0165) %31 = call i1 @llvm.loop.decrement.i64(i64 1) - br i1 %31, label %for.body, label %for.cond.cleanup, !llvm.loop !7 + br i1 %31, label %for.body, label %for.cond.cleanup } - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) - declare double @llvm.fmuladd.f64(double, double, double) #1 + declare double @llvm.fmuladd.f64(double, double, double) - ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn - declare void @llvm.set.loop.iterations.i64(i64) #2 + declare void @llvm.set.loop.iterations.i64(i64) - ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn - declare i1 @llvm.loop.decrement.i64(i64) #2 + declare i1 @llvm.loop.decrement.i64(i64) - attributes #0 = { nofree nosync nounwind memory(argmem: read) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-aix-small-local-exec-tls,-privileged,-rop-protect,-spe" } - attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } - attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn } - - !llvm.module.flags = !{!0, !1} - !llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 7, !"uwtable", i32 2} - !2 = !{!"clang version 18.0.0 (https://miratech-soft@dev.azure.com/miratech-soft/llvm/_git/llvm c8d01fb665fc5d9378100a6d92ebcd3be49be655)"} - !3 = !{!4, !4, i64 0} - !4 = !{!"double", !5, i64 0} - !5 = !{!"omnipotent char", !6, i64 0} - !6 = !{!"Simple C/C++ TBAA"} - !7 = distinct !{!7, !8, !9} - !8 = !{!"llvm.loop.mustprogress"} - !9 = !{!"llvm.loop.unroll.disable"} - ... --- name: kernel -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: vsfrc, preferred-register: '' } - - { id: 1, class: vsfrc, preferred-register: '' } - - { id: 2, class: g8rc, preferred-register: '' } - - { id: 3, class: vsfrc, preferred-register: '' } - - { id: 4, class: vsfrc, preferred-register: '' } - - { id: 5, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 6, class: g8rc, preferred-register: '' } - - { id: 7, class: vsfrc, preferred-register: '' } - - { id: 8, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 9, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 10, class: g8rc, preferred-register: '' } - - { id: 11, class: gprc, preferred-register: '' } - - { id: 12, class: vsfrc, preferred-register: '' } - - { id: 13, class: crrc, preferred-register: '' } - - { id: 14, class: vsfrc, preferred-register: '' } - - { id: 15, class: g8rc, preferred-register: '' } - - { id: 16, class: g8rc, preferred-register: '' } - - { id: 17, class: g8rc, preferred-register: '' } - - { id: 18, class: f8rc, preferred-register: '' } - - { id: 19, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 20, class: vsfrc, preferred-register: '' } - - { id: 21, class: vsfrc, preferred-register: '' } - - { id: 22, class: vsfrc, preferred-register: '' } - - { id: 23, class: vsfrc, preferred-register: '' } - - { id: 24, class: vsfrc, preferred-register: '' } - - { id: 25, class: vsfrc, preferred-register: '' } - - { id: 26, class: vsfrc, preferred-register: '' } - - { id: 27, class: vsfrc, preferred-register: '' } - - { id: 28, class: vsfrc, preferred-register: '' } - - { id: 29, class: vsfrc, preferred-register: '' } - - { id: 30, class: vsfrc, preferred-register: '' } - - { id: 31, class: vsfrc, preferred-register: '' } - - { id: 32, class: vsfrc, preferred-register: '' } - - { id: 33, class: vsfrc, preferred-register: '' } - - { id: 34, class: vsfrc, preferred-register: '' } - - { id: 35, class: vsfrc, preferred-register: '' } - - { id: 36, class: vsfrc, preferred-register: '' } - - { id: 37, class: vsfrc, preferred-register: '' } - - { id: 38, class: vsfrc, preferred-register: '' } - - { id: 39, class: vsfrc, preferred-register: '' } - - { id: 40, class: vsfrc, preferred-register: '' } - - { id: 41, class: vsfrc, preferred-register: '' } - - { id: 42, class: vsfrc, preferred-register: '' } - - { id: 43, class: vsfrc, preferred-register: '' } - - { id: 44, class: vsfrc, preferred-register: '' } - - { id: 45, class: vsfrc, preferred-register: '' } - - { id: 46, class: vsfrc, preferred-register: '' } - - { id: 47, class: vsfrc, preferred-register: '' } - - { id: 48, class: vsfrc, preferred-register: '' } - - { id: 49, class: vsfrc, preferred-register: '' } - - { id: 50, class: vsfrc, preferred-register: '' } - - { id: 51, class: vsfrc, preferred-register: '' } - - { id: 52, class: vsfrc, preferred-register: '' } - - { id: 53, class: vsfrc, preferred-register: '' } - - { id: 54, class: vsfrc, preferred-register: '' } - - { id: 55, class: vsfrc, preferred-register: '' } - - { id: 56, class: vsfrc, preferred-register: '' } - - { id: 57, class: vsfrc, preferred-register: '' } - - { id: 58, class: vsfrc, preferred-register: '' } - - { id: 59, class: vsfrc, preferred-register: '' } - - { id: 60, class: vsfrc, preferred-register: '' } - - { id: 61, class: vsfrc, preferred-register: '' } - - { id: 62, class: crbitrc, preferred-register: '' } liveins: - { reg: '$x3', virtual-reg: '%8' } - { reg: '$x4', virtual-reg: '%9' } - { reg: '$x5', virtual-reg: '%10' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} body: | bb.0.entry: successors: %bb.2(0x50000000), %bb.1(0x30000000) @@ -251,16 +111,12 @@ body: | BCC 44, killed %13, %bb.2 bb.1: - successors: %bb.3(0x80000000) - %12:vsfrc = XXLXORdpz B %bb.3 bb.2.for.body.preheader: - successors: %bb.4(0x80000000) - - %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a, !tbaa !3) - %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1, !tbaa !3) + %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a) + %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1) %16:g8rc = IMPLICIT_DEF %15:g8rc = INSERT_SUBREG killed %16, killed %11, %subreg.sub_32 %17:g8rc = RLDICL killed %15, 0, 32 @@ -279,7 +135,7 @@ body: | %4:vsfrc = PHI %14, %bb.2, %7, %bb.4 %5:g8rc_and_g8rc_nox0 = PHI %2, %bb.2, %6, %bb.4 - %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3, !tbaa !3) + %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3) %6:g8rc = COPY killed %19 %20:vsfrc = nofpexcept XSMADDADP %0, %0, %18, implicit $rm %21:vsfrc = nofpexcept XSMADDADP %20, %20, %18, implicit $rm diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir new file mode 100644 index 0000000..eda1180 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir @@ -0,0 +1,902 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir \ +# RUN: -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir \ +# RUN: -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +--- +name: anyext_nxv1i16_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s16>) = G_ANYEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i32_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i16_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s16>) = G_ANYEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s8>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i16_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s16>) = G_ANYEXT %0(<vscale x 4 x s8>) + $v8 = COPY %1(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv4i32_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s8>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s8>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i16_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s16>) = G_ANYEXT %0(<vscale x 8 x s8>) + $v8m2 = COPY %1(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv8i32_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s8>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s8>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i16_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 16 x s8>) = COPY $v8m2 + %1:vrb(<vscale x 16 x s16>) = G_ANYEXT %0(<vscale x 16 x s8>) + $v8m4 = COPY %1(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv16i32_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 16 x s8>) = COPY $v8m4 + %1:vrb(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s8>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv32i16_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 32 x s8>) = COPY $v8m4 + %1:vrb(<vscale x 32 x s16>) = G_ANYEXT %0(<vscale x 32 x s8>) + $v8m8 = COPY %1(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i32_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s16>) = COPY $v8 + %1:vrb(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s16>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s16>) = COPY $v8 + %1:vrb(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s16>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s16>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s16>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i32_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 4 x s16>) = COPY $v8 + %1:vrb(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s16>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s16>) = COPY $v8 + %1:vrb(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s16>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i32_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 8 x s16>) = COPY $v8m2 + %1:vrb(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s16>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s16>) = COPY $v8m2 + %1:vrb(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s16>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i32_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 16 x s16>) = COPY $v8m4 + %1:vrb(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s16>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i64_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s32>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s32>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s32>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s32>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s32>) = COPY $v8m2 + %1:vrb(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s32>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s32>) = COPY $v8m4 + %1:vrb(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s32>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir new file mode 100644 index 0000000..df0d48a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir @@ -0,0 +1,534 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +# Don't test i1 element types here since they have been widened to i8 in legalization + +--- +name: icmp_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(ult), %0(<vscale x 1 x s8>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(slt), %0(<vscale x 2 x s8>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(uge), %0(<vscale x 4 x s8>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sge), %0(<vscale x 8 x s8>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i8 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i8 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(ugt), %0(<vscale x 16 x s8>), %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i8 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i8 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s8>), %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv64i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv64i8 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv64i8 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 64 x s1>) = G_ICMP intpred(ule), %0(<vscale x 64 x s8>), %0 + $v8 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i16 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i16 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sle), %0(<vscale x 1 x s16>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i16 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i16 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ne), %0(<vscale x 2 x s16>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i16 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i16 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(eq), %0(<vscale x 4 x s16>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i16 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i16 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s16>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i16 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i16 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(slt), %0(<vscale x 16 x s16>), %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i16 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i16 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(uge), %0(<vscale x 32 x s16>), %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i32 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i32 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sge), %0(<vscale x 1 x s32>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i32 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i32 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ugt), %0(<vscale x 2 x s32>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i32 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i32 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s32>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i32 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i32 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ule), %0(<vscale x 8 x s32>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i32 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i32 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sle), %0(<vscale x 16 x s32>), %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i64 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i64 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(eq), %0(<vscale x 1 x s64>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i64 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i64 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ne), %0(<vscale x 2 x s64>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i64 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i64 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(ult), %0(<vscale x 4 x s64>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i64 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i64 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s64>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir new file mode 100644 index 0000000..382166f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir @@ -0,0 +1,900 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +--- +name: sext_nxv1i16_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s16>) = G_SEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i32_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i16_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s16>) = G_SEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s8>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i16_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s16>) = G_SEXT %0(<vscale x 4 x s8>) + $v8 = COPY %1(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv4i32_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s8>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s8>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i16_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s16>) = G_SEXT %0(<vscale x 8 x s8>) + $v8m2 = COPY %1(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv8i32_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s8>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s8>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i16_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 16 x s8>) = COPY $v8m2 + %1:vrb(<vscale x 16 x s16>) = G_SEXT %0(<vscale x 16 x s8>) + $v8m4 = COPY %1(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv16i32_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 16 x s8>) = COPY $v8m2 + %1:vrb(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s8>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv32i16_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 32 x s8>) = COPY $v8m4 + %1:vrb(<vscale x 32 x s16>) = G_SEXT %0(<vscale x 32 x s8>) + $v8m8 = COPY %1(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i32_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s16>) = COPY $v8 + %1:vrb(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s16>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s16>) = COPY $v8 + %1:vrb(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s16>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s16>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s16>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i32_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 4 x s16>) = COPY $v8 + %1:vrb(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s16>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s16>) = COPY $v8 + %1:vrb(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s16>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i32_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 8 x s16>) = COPY $v8m2 + %1:vrb(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s16>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s16>) = COPY $v8m2 + %1:vrb(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s16>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i32_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 16 x s16>) = COPY $v8m4 + %1:vrb(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s16>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i64_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s32>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s32>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s32>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s32>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s32>) = COPY $v8m2 + %1:vrb(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s32>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s32>) = COPY $v8m4 + %1:vrb(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s32>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir new file mode 100644 index 0000000..2fc9e05 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir @@ -0,0 +1,900 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +--- +name: zext_nxv1i16_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s16>) = G_ZEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i32_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s8>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i16_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s16>) = G_ZEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s8>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s8>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i16_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s16>) = G_ZEXT %0(<vscale x 4 x s8>) + $v8 = COPY %1(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv4i32_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s8>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s8>) = COPY $v8 + %1:vrb(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s8>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i16_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s16>) = G_ZEXT %0(<vscale x 8 x s8>) + $v8m2 = COPY %1(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv8i32_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s8>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s8>) = COPY $v8 + %1:vrb(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s8>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i16_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 16 x s8>) = COPY $v8m2 + %1:vrb(<vscale x 16 x s16>) = G_ZEXT %0(<vscale x 16 x s8>) + $v8m4 = COPY %1(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv16i32_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 16 x s8>) = COPY $v8m2 + %1:vrb(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s8>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv32i16_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 32 x s8>) = COPY $v8m4 + %1:vrb(<vscale x 32 x s16>) = G_ZEXT %0(<vscale x 32 x s8>) + $v8m8 = COPY %1(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i32_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s16>) = COPY $v8 + %1:vrb(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s16>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 2 x s16>) = COPY $v8 + %1:vrb(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s16>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s16>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s16>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i32_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 4 x s16>) = COPY $v8 + %1:vrb(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s16>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s16>) = COPY $v8 + %1:vrb(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s16>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i32_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 8 x s16>) = COPY $v8m2 + %1:vrb(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s16>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s16>) = COPY $v8m4 + %1:vrb(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s16>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i32_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 16 x s16>) = COPY $v8m4 + %1:vrb(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s16>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i64_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb(<vscale x 1 x s32>) = COPY $v8 + %1:vrb(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s32>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb(<vscale x 2 x s32>) = COPY $v8 + %1:vrb(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s32>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb(<vscale x 4 x s32>) = COPY $v8m2 + %1:vrb(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s32>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb(<vscale x 8 x s32>) = COPY $v8m4 + %1:vrb(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s32>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir new file mode 100644 index 0000000..3a2d40f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir @@ -0,0 +1,1589 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +# Extend from s1 element vectors +--- +name: anyext_nxv1i8_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i8_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i8_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s8>) = G_ANYEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i16_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i16_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i16_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s16>) = G_ANYEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i32_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i32_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i32_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s32>) = G_ANYEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i64_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i8_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i8_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i8_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s8>) = G_ANYEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i16_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i16_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i16_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s16>) = G_ANYEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i32_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i32_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i32_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s32>) = G_ANYEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s1>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i8_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i8_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv4i8_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s8>) = G_ANYEXT %1(<vscale x 4 x s1>) + $v8 = COPY %0(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv4i16_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i16_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv4i16_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s16>) = G_ANYEXT %1(<vscale x 4 x s1>) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv4i32_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i32_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv4i32_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s32>) = G_ANYEXT %1(<vscale x 4 x s1>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s1>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i8_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i8_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv8i8_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s8>) = G_ANYEXT %1(<vscale x 8 x s1>) + $v8 = COPY %0(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv8i16_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i16_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv8i16_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s16>) = G_ANYEXT %1(<vscale x 8 x s1>) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv8i32_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i32_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv8i32_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s32>) = G_ANYEXT %1(<vscale x 8 x s1>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s1>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv16i8_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv16i8_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv16i8_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 16 x s1>) = COPY $v0 + %0:_(<vscale x 16 x s8>) = G_ANYEXT %1(<vscale x 16 x s1>) + $v8m2 = COPY %0(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv16i16_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv16i16_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv16i16_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s1>) = COPY $v0 + %0:_(<vscale x 16 x s16>) = G_ANYEXT %1(<vscale x 16 x s1>) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv16i32_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv16i32_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv16i32_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s1>) = COPY $v0 + %0:_(<vscale x 16 x s32>) = G_ANYEXT %1(<vscale x 16 x s1>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv32i8_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv32i8_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv32i8_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 32 x s1>) = COPY $v0 + %0:_(<vscale x 32 x s8>) = G_ANYEXT %1(<vscale x 32 x s1>) + $v8m4 = COPY %0(<vscale x 32 x s8>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv32i16_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv32i16_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv32i16_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 32 x s1>) = COPY $v0 + %0:_(<vscale x 32 x s16>) = G_ANYEXT %1(<vscale x 32 x s1>) + $v8m8 = COPY %0(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv64i8_nxv64i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv64i8_nxv64i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv64i8_nxv64i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 64 x s1>) = COPY $v0 + %0:_(<vscale x 64 x s8>) = G_ANYEXT %1(<vscale x 64 x s1>) + $v8m8 = COPY %0(<vscale x 64 x s8>) + PseudoRET implicit $v8m8 +... + +# Extend from s8 element vectors +--- +name: anyext_nxv1i16_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s16>) = G_ANYEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i32_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s32>) = G_ANYEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i64_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i16_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s16>) = G_ANYEXT %1(<vscale x 2 x s8>) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i32_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s32>) = G_ANYEXT %1(<vscale x 2 x s8>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s8>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i16_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s16>) = G_ANYEXT %1(<vscale x 4 x s8>) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv4i32_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s32>) = G_ANYEXT %1(<vscale x 4 x s8>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s8>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i16_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s16>) = G_ANYEXT %1(<vscale x 8 x s8>) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv8i32_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s32>) = G_ANYEXT %1(<vscale x 8 x s8>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s8>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv16i16_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s8>) = COPY $v8m2 + %0:_(<vscale x 16 x s16>) = G_ANYEXT %1(<vscale x 16 x s8>) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv16i32_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s8>) = COPY $v8m4 + %0:_(<vscale x 16 x s32>) = G_ANYEXT %1(<vscale x 16 x s8>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv32i16_nxv32i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 32 x s8>) = COPY $v8m4 + %0:_(<vscale x 32 x s16>) = G_ANYEXT %1(<vscale x 32 x s8>) + $v8m8 = COPY %0(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 +... + +# Extend from s16 element vectors +--- +name: anyext_nxv1i32_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = COPY $v8 + %0:_(<vscale x 1 x s32>) = G_ANYEXT %1(<vscale x 1 x s16>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i64_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s16>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i32_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s16>) = COPY $v8 + %0:_(<vscale x 2 x s32>) = G_ANYEXT %1(<vscale x 2 x s16>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s16>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s16>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i32_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s16>) = COPY $v8 + %0:_(<vscale x 4 x s32>) = G_ANYEXT %1(<vscale x 4 x s16>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s16>) = COPY $v8 + %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s16>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i32_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s16>) = COPY $v8m2 + %0:_(<vscale x 8 x s32>) = G_ANYEXT %1(<vscale x 8 x s16>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s16>) = COPY $v8m2 + %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s16>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv16i32_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>) + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>) + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s16>) = COPY $v8m4 + %0:_(<vscale x 16 x s32>) = G_ANYEXT %1(<vscale x 16 x s16>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... + +# Extend from s32 element vectors +--- +name: anyext_nxv1i64_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>) + ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>) + ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s32>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s32>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>) + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>) + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s32>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s32>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>) + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>) + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s32>) = COPY $v8m2 + %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s32>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>) + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>) + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s32>) = COPY $v8m4 + %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s32>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir new file mode 100644 index 0000000..d1df954 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir @@ -0,0 +1,810 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +--- +name: icmp_nxv1i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i1 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 1 x s8>), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i1 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 1 x s8>), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i1 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 2 x s8>), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i1 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 2 x s8>), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i1 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 4 x s8>), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i1 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 4 x s8>), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i1 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 8 x s8>), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i1 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 8 x s8>), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i1 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 16 x s8>), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i1 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 16 x s8>), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv32i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv32i1 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 32 x s8>), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv32i1 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 32 x s8>), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv64i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv64i1 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 64 x s8>), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv64i1 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 64 x s8>), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i8 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i8 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i8 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i8 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i8 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i8 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i8 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i8 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i8 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i8 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv32i8 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv32i8 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv64i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv64i8 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv64i8 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i16 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i16 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i16 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i16 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i16 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i16 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i16 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i16 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i16 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i16 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv32i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv32i16 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv32i16 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i32 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i32 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i32 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i32 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i32 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i32 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i32 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i32 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i32 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i32 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i64 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i64 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i64 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i64 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i64 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i64 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i64 + ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i64 + ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir new file mode 100644 index 0000000..1571daf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir @@ -0,0 +1,1589 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +# Extend from s1 element vectors +--- +name: sext_nxv1i8_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i8_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i8_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s8>) = G_SEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i16_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i16_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i16_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s16>) = G_SEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i32_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i32_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i32_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s32>) = G_SEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i64_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v0 + %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i8_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i8_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i8_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s8>) = G_SEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i16_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i16_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i16_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s16>) = G_SEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i32_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i32_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i32_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s32>) = G_SEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s1>) = COPY $v0 + %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s1>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i8_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i8_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv4i8_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s8>) = G_SEXT %1(<vscale x 4 x s1>) + $v8 = COPY %0(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv4i16_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i16_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv4i16_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s16>) = G_SEXT %1(<vscale x 4 x s1>) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv4i32_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i32_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv4i32_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s32>) = G_SEXT %1(<vscale x 4 x s1>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s1>) = COPY $v0 + %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s1>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i8_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i8_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv8i8_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s8>) = G_SEXT %1(<vscale x 8 x s1>) + $v8 = COPY %0(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv8i16_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i16_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv8i16_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s16>) = G_SEXT %1(<vscale x 8 x s1>) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv8i32_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i32_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv8i32_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s32>) = G_SEXT %1(<vscale x 8 x s1>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s1>) = COPY $v0 + %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s1>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv16i8_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv16i8_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv16i8_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 16 x s1>) = COPY $v0 + %0:_(<vscale x 16 x s8>) = G_SEXT %1(<vscale x 16 x s1>) + $v8m2 = COPY %0(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv16i16_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv16i16_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv16i16_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s1>) = COPY $v0 + %0:_(<vscale x 16 x s16>) = G_SEXT %1(<vscale x 16 x s1>) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv16i32_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv16i32_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv16i32_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s1>) = COPY $v0 + %0:_(<vscale x 16 x s32>) = G_SEXT %1(<vscale x 16 x s1>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv32i8_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv32i8_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv32i8_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 32 x s1>) = COPY $v0 + %0:_(<vscale x 32 x s8>) = G_SEXT %1(<vscale x 32 x s1>) + $v8m4 = COPY %0(<vscale x 32 x s8>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv32i16_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv32i16_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv32i16_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 32 x s1>) = COPY $v0 + %0:_(<vscale x 32 x s16>) = G_SEXT %1(<vscale x 32 x s1>) + $v8m8 = COPY %0(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv64i8_nxv64i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv64i8_nxv64i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv64i8_nxv64i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 64 x s1>) = COPY $v0 + %0:_(<vscale x 64 x s8>) = G_SEXT %1(<vscale x 64 x s1>) + $v8m8 = COPY %0(<vscale x 64 x s8>) + PseudoRET implicit $v8m8 +... + +# Extend from s8 element vectors +--- +name: sext_nxv1i16_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s16>) = G_SEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i32_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s32>) = G_SEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i64_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i16_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s16>) = G_SEXT %1(<vscale x 2 x s8>) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i32_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s32>) = G_SEXT %1(<vscale x 2 x s8>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s8>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i16_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s16>) = G_SEXT %1(<vscale x 4 x s8>) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv4i32_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s32>) = G_SEXT %1(<vscale x 4 x s8>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s8>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i16_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s16>) = G_SEXT %1(<vscale x 8 x s8>) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv8i32_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s32>) = G_SEXT %1(<vscale x 8 x s8>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s8>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv16i16_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s8>) = COPY $v8m2 + %0:_(<vscale x 16 x s16>) = G_SEXT %1(<vscale x 16 x s8>) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv16i32_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s8>) = COPY $v8m2 + %0:_(<vscale x 16 x s32>) = G_SEXT %1(<vscale x 16 x s8>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv32i16_nxv32i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 32 x s8>) = COPY $v8m4 + %0:_(<vscale x 32 x s16>) = G_SEXT %1(<vscale x 32 x s8>) + $v8m8 = COPY %0(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 +... + +# Extend from s16 element vectors +--- +name: sext_nxv1i32_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = COPY $v8 + %0:_(<vscale x 1 x s32>) = G_SEXT %1(<vscale x 1 x s16>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i64_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s16>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i32_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s16>) = COPY $v8 + %0:_(<vscale x 2 x s32>) = G_SEXT %1(<vscale x 2 x s16>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s16>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s16>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i32_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s16>) = COPY $v8 + %0:_(<vscale x 4 x s32>) = G_SEXT %1(<vscale x 4 x s16>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s16>) = COPY $v8 + %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s16>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i32_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s16>) = COPY $v8m2 + %0:_(<vscale x 8 x s32>) = G_SEXT %1(<vscale x 8 x s16>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s16>) = COPY $v8m2 + %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s16>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv16i32_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>) + ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>) + ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s16>) = COPY $v8m4 + %0:_(<vscale x 16 x s32>) = G_SEXT %1(<vscale x 16 x s16>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... + +# Extend from s32 element vectors +--- +name: sext_nxv1i64_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>) + ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>) + ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s32>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s32>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>) + ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>) + ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s32>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s32>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>) + ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>) + ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s32>) = COPY $v8m2 + %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s32>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>) + ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>) + ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s32>) = COPY $v8m4 + %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s32>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir new file mode 100644 index 0000000..109536a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir @@ -0,0 +1,694 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: splatvector_nxv1i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv1i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 1 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv2i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv2i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv4i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv4i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv8i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv8i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 8 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv16i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv16i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 16 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv32i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 32 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 32 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv32i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 32 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 32 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 32 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv64i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 64 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 64 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv64i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 64 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 64 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 64 x s1>) + PseudoRET implicit $v0 +... + +--- +name: splatvector_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 1 x s8>) + PseudoRET implicit $v8 + +... + +--- +name: splatvector_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 2 x s8>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 4 x s8>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 8 x s8>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8m2 = COPY %2(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8m2 = COPY %2(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8m4 = COPY %2(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8m2 = COPY %2(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8m4 = COPY %2(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv16i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8m8 = COPY %2(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir new file mode 100644 index 0000000..7bf5f83 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir @@ -0,0 +1,817 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: splatvector_nxv1i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv1i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 1 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv2i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv2i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv4i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv4i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv8i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv8i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 8 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv16i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv16i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 16 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv32i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 32 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 32 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv32i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 32 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 32 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 32 x s1>) + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv64i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 64 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 64 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv64i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 64 x s8>), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 64 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2(<vscale x 64 x s1>) + PseudoRET implicit $v0 +... + +--- +name: splatvector_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 1 x s8>) + PseudoRET implicit $v8 + +... + +--- +name: splatvector_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 2 x s8>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 4 x s8>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2(<vscale x 8 x s8>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR %1(s8) + $v8m2 = COPY %2(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8m2 = COPY %2(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR %1(s16) + $v8m4 = COPY %2(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8m2 = COPY %2(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8m4 = COPY %2(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv16i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR %1(s32) + $v8m8 = COPY %2(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: splatvector_nxv1i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR %1(s64) + $v8 = COPY %2(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1(s64) + $v8m2 = COPY %2(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv4i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR %1(s64) + $v8m4 = COPY %2(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv8i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR %1(s64) + $v8m8 = COPY %2(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir new file mode 100644 index 0000000..806c9b9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=HasF64 %s +# RUN: llc -mtriple=riscv32 -mattr=+Zve64x -run-pass=legalizer %s -o - | FileCheck --check-prefix=NoF64 %s + +--- +name: splatvector_nxv1i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv1i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s64>) + ; HasF64-NEXT: PseudoRET implicit $v8 + ; + ; NoF64-LABEL: name: splatvector_nxv1i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 1 x s64>) + ; NoF64-NEXT: PseudoRET implicit $v8 + %0:_(s64) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR %0(s64) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv2i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s64>) + ; HasF64-NEXT: PseudoRET implicit $v8m2 + ; + ; NoF64-LABEL: name: splatvector_nxv2i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 2 x s64>) + ; NoF64-NEXT: PseudoRET implicit $v8m2 + %0:_(s64) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %0(s64) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv4i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv4i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s64>) + ; HasF64-NEXT: PseudoRET implicit $v8m4 + ; + ; NoF64-LABEL: name: splatvector_nxv4i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 4 x s64>) + ; NoF64-NEXT: PseudoRET implicit $v8m4 + %0:_(s64) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR %0(s64) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv8i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv8i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s64>) + ; HasF64-NEXT: PseudoRET implicit $v8m8 + ; + ; NoF64-LABEL: name: splatvector_nxv8i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 8 x s64>) + ; NoF64-NEXT: PseudoRET implicit $v8m8 + %0:_(s64) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR %0(s64) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir index 4de02b1..8a34521 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir @@ -9,8 +9,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i8 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s8>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 1 x s8>) = COPY $v8 %1:_(<vscale x 1 x s8>) = COPY $v9 @@ -27,8 +27,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i8 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 2 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 2 x s8>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 2 x s8>) = COPY $v8 %1:_(<vscale x 2 x s8>) = COPY $v9 @@ -45,8 +45,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i8 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 4 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 4 x s8>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 4 x s8>) = COPY $v8 %1:_(<vscale x 4 x s8>) = COPY $v9 @@ -63,8 +63,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i8 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 8 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 8 x s8>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 8 x s8>) = COPY $v8 %1:_(<vscale x 8 x s8>) = COPY $v9 @@ -81,8 +81,8 @@ body: | ; CHECK-LABEL: name: test_nxv16i8 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 16 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 16 x s8>) ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_(<vscale x 16 x s8>) = COPY $v8m2 %1:_(<vscale x 16 x s8>) = COPY $v10m2 @@ -99,8 +99,8 @@ body: | ; CHECK-LABEL: name: test_nxv32i8 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 32 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 32 x s8>) ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_(<vscale x 32 x s8>) = COPY $v8m4 %1:_(<vscale x 32 x s8>) = COPY $v12m4 @@ -117,8 +117,8 @@ body: | ; CHECK-LABEL: name: test_nxv64i8 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 64 x s8>) = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 64 x s8>) = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 64 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 64 x s8>) ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_(<vscale x 64 x s8>) = COPY $v8m8 %1:_(<vscale x 64 x s8>) = COPY $v16m8 @@ -135,8 +135,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i16 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s16>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 1 x s16>) = COPY $v8 %1:_(<vscale x 1 x s16>) = COPY $v9 @@ -153,8 +153,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i16 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 2 x s16>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 2 x s16>) = COPY $v8 %1:_(<vscale x 2 x s16>) = COPY $v9 @@ -171,8 +171,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i16 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 4 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 4 x s16>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 4 x s16>) = COPY $v8 %1:_(<vscale x 4 x s16>) = COPY $v9 @@ -189,8 +189,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i16 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 8 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 8 x s16>) ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_(<vscale x 8 x s16>) = COPY $v8m2 %1:_(<vscale x 8 x s16>) = COPY $v10m2 @@ -207,8 +207,8 @@ body: | ; CHECK-LABEL: name: test_nxv16i16 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 16 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 16 x s16>) ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_(<vscale x 16 x s16>) = COPY $v8m4 %1:_(<vscale x 16 x s16>) = COPY $v12m4 @@ -225,8 +225,8 @@ body: | ; CHECK-LABEL: name: test_nxv32i16 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 32 x s16>) = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 32 x s16>) = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 32 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 32 x s16>) ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_(<vscale x 32 x s16>) = COPY $v8m8 %1:_(<vscale x 32 x s16>) = COPY $v16m8 @@ -243,8 +243,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i32 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s32>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 1 x s32>) = COPY $v8 %1:_(<vscale x 1 x s32>) = COPY $v9 @@ -261,8 +261,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 2 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 2 x s32>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 2 x s32>) = COPY $v8 %1:_(<vscale x 2 x s32>) = COPY $v9 @@ -279,8 +279,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i32 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 4 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 4 x s32>) ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_(<vscale x 4 x s32>) = COPY $v8m2 %1:_(<vscale x 4 x s32>) = COPY $v10m2 @@ -297,8 +297,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i32 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 8 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 8 x s32>) ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_(<vscale x 8 x s32>) = COPY $v8m4 %1:_(<vscale x 8 x s32>) = COPY $v12m4 @@ -315,8 +315,8 @@ body: | ; CHECK-LABEL: name: test_nxv16i32 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 16 x s32>) = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s32>) = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 16 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 16 x s32>) ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_(<vscale x 16 x s32>) = COPY $v8m8 %1:_(<vscale x 16 x s32>) = COPY $v16m8 @@ -333,8 +333,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i64 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s64>) = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s64>) = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s64>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s64>) ; CHECK-NEXT: PseudoRET implicit $v8 %0:_(<vscale x 1 x s64>) = COPY $v8 %1:_(<vscale x 1 x s64>) = COPY $v9 @@ -351,8 +351,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i64 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 2 x s64>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 2 x s64>) ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_(<vscale x 2 x s64>) = COPY $v8m2 %1:_(<vscale x 2 x s64>) = COPY $v10m2 @@ -369,8 +369,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i64 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s64>) = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s64>) = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 4 x s64>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 4 x s64>) ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_(<vscale x 4 x s64>) = COPY $v8m4 %1:_(<vscale x 4 x s64>) = COPY $v12m4 @@ -387,8 +387,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i64 ; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s64>) = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s64>) = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 8 x s64>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 8 x s64>) ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_(<vscale x 8 x s64>) = COPY $v8m8 %1:_(<vscale x 8 x s64>) = COPY $v16m8 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir new file mode 100644 index 0000000..fe4ddfa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir @@ -0,0 +1,1589 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +# Extend from s1 element vectors +--- +name: zext_nxv1i8_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i8_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i8_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v8 + %0:_(<vscale x 1 x s8>) = G_ZEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i16_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i16_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i16_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v8 + %0:_(<vscale x 1 x s16>) = G_ZEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i32_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i32_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i32_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v8 + %0:_(<vscale x 1 x s32>) = G_ZEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i64_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s1>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s1>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i8_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i8_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i8_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v8 + %0:_(<vscale x 2 x s8>) = G_ZEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i16_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i16_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i16_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v8 + %0:_(<vscale x 2 x s16>) = G_ZEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i32_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i32_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i32_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s1>) = COPY $v8 + %0:_(<vscale x 2 x s32>) = G_ZEXT %1(<vscale x 2 x s1>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s1>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s1>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i8_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i8_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv4i8_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s1>) = COPY $v8 + %0:_(<vscale x 4 x s8>) = G_ZEXT %1(<vscale x 4 x s1>) + $v8 = COPY %0(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv4i16_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i16_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv4i16_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s1>) = COPY $v8 + %0:_(<vscale x 4 x s16>) = G_ZEXT %1(<vscale x 4 x s1>) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv4i32_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i32_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv4i32_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s1>) = COPY $v8 + %0:_(<vscale x 4 x s32>) = G_ZEXT %1(<vscale x 4 x s1>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s1>) = COPY $v8 + %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s1>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i8_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i8_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv8i8_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 8 x s1>) = COPY $v8 + %0:_(<vscale x 8 x s8>) = G_ZEXT %1(<vscale x 8 x s1>) + $v8 = COPY %0(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv8i16_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i16_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv8i16_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s1>) = COPY $v8 + %0:_(<vscale x 8 x s16>) = G_ZEXT %1(<vscale x 8 x s1>) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv8i32_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i32_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv8i32_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s1>) = COPY $v8 + %0:_(<vscale x 8 x s32>) = G_ZEXT %1(<vscale x 8 x s1>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s1>) = COPY $v8 + %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s1>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv16i8_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i8_nxv16i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv16i8_nxv16i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 16 x s1>) = COPY $v8 + %0:_(<vscale x 16 x s8>) = G_ZEXT %1(<vscale x 16 x s1>) + $v8m2 = COPY %0(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv16i16_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i16_nxv16i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv16i16_nxv16i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s1>) = COPY $v8 + %0:_(<vscale x 16 x s16>) = G_ZEXT %1(<vscale x 16 x s1>) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv16i32_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i32_nxv16i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv16i32_nxv16i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s1>) = COPY $v8 + %0:_(<vscale x 16 x s32>) = G_ZEXT %1(<vscale x 16 x s1>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv32i8_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv32i8_nxv32i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv32i8_nxv32i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 32 x s1>) = COPY $v8 + %0:_(<vscale x 32 x s8>) = G_ZEXT %1(<vscale x 32 x s1>) + $v8m4 = COPY %0(<vscale x 32 x s8>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv32i16_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv32i16_nxv32i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv32i16_nxv32i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 32 x s1>) = COPY $v8 + %0:_(<vscale x 32 x s16>) = G_ZEXT %1(<vscale x 32 x s1>) + $v8m8 = COPY %0(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv64i8_nxv64i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv64i8_nxv64i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv64i8_nxv64i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 64 x s1>) = COPY $v8 + %0:_(<vscale x 64 x s8>) = G_ZEXT %1(<vscale x 64 x s1>) + $v8m8 = COPY %0(<vscale x 64 x s8>) + PseudoRET implicit $v8m8 +... + +# Extend from s8 element vectors +--- +name: zext_nxv1i16_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s16>) = G_ZEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i32_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s32>) = G_ZEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i64_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s8>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i16_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s16>) = G_ZEXT %1(<vscale x 2 x s8>) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i32_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s32>) = G_ZEXT %1(<vscale x 2 x s8>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s8>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s8>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i16_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s16>) = G_ZEXT %1(<vscale x 4 x s8>) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv4i32_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s32>) = G_ZEXT %1(<vscale x 4 x s8>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s8>) = COPY $v8 + %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s8>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i16_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s16>) = G_ZEXT %1(<vscale x 8 x s8>) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv8i32_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s32>) = G_ZEXT %1(<vscale x 8 x s8>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s8>) = COPY $v8 + %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s8>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv16i16_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s8>) = COPY $v8m2 + %0:_(<vscale x 16 x s16>) = G_ZEXT %1(<vscale x 16 x s8>) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv16i32_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s8>) = COPY $v8m2 + %0:_(<vscale x 16 x s32>) = G_ZEXT %1(<vscale x 16 x s8>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv32i16_nxv32i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>) + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>) + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 32 x s8>) = COPY $v8m4 + %0:_(<vscale x 32 x s16>) = G_ZEXT %1(<vscale x 32 x s8>) + $v8m8 = COPY %0(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 +... + +# Extend from s16 element vectors +--- +name: zext_nxv1i32_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = COPY $v8 + %0:_(<vscale x 1 x s32>) = G_ZEXT %1(<vscale x 1 x s16>) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i64_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s16>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i32_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s16>) = COPY $v8 + %0:_(<vscale x 2 x s32>) = G_ZEXT %1(<vscale x 2 x s16>) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s16>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s16>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i32_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s16>) = COPY $v8 + %0:_(<vscale x 4 x s32>) = G_ZEXT %1(<vscale x 4 x s16>) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s16>) = COPY $v8 + %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s16>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i32_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s16>) = COPY $v8m2 + %0:_(<vscale x 8 x s32>) = G_ZEXT %1(<vscale x 8 x s16>) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s16>) = COPY $v8m4 + %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s16>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv16i32_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>) + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>) + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s16>) = COPY $v8m4 + %0:_(<vscale x 16 x s32>) = G_ZEXT %1(<vscale x 16 x s16>) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... + +# Extend from s32 element vectors +--- +name: zext_nxv1i64_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>) + ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>) + ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s32>) = COPY $v8 + %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s32>) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>) + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>) + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s32>) = COPY $v8 + %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s32>) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>) + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>) + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s32>) = COPY $v8m2 + %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s32>) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>) + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>) + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s32>) = COPY $v8m4 + %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s32>) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir new file mode 100644 index 0000000..062179c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir @@ -0,0 +1,820 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: anyext_nxv1i16_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s16>) = G_ANYEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i32_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i16_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s16>) = G_ANYEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s8>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i16_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s16>) = G_ANYEXT %0(<vscale x 4 x s8>) + $v8 = COPY %1(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv4i32_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s8>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s8>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i16_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s16>) = G_ANYEXT %0(<vscale x 8 x s8>) + $v8m2 = COPY %1(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv8i32_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s8>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s8>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i16_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 16 x s8>) = COPY $v8m2 + %1:_(<vscale x 16 x s16>) = G_ANYEXT %0(<vscale x 16 x s8>) + $v8m4 = COPY %1(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv16i32_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 16 x s8>) = COPY $v8m4 + %1:_(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s8>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv32i16_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 32 x s8>) = COPY $v8m4 + %1:_(<vscale x 32 x s16>) = G_ANYEXT %0(<vscale x 32 x s8>) + $v8m8 = COPY %1(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i32_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = COPY $v8 + %1:_(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s16>) = COPY $v8 + %1:_(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s16>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>) + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s16>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s16>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i32_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %1:_(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s16>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>) + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %1:_(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s16>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i32_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 8 x s16>) = COPY $v8m2 + %1:_(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s16>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>) + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s16>) = COPY $v8m2 + %1:_(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s16>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i32_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>) + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>) + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 16 x s16>) = COPY $v8m4 + %1:_(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s16>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i64_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>) + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>) + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s32>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s32>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>) + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>) + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s32>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s32>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>) + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>) + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s32>) = COPY $v8m2 + %1:_(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s32>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>) + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>) + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s32>) = COPY $v8m4 + %1:_(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s32>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir new file mode 100644 index 0000000..925d6ae --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir @@ -0,0 +1,675 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: icmp_nxv1i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s1>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s1>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s1>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s1>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s1>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s1>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s1>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s1>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s1>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s1>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s1>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s1>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s1>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s1>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s1>), %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s1>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s1>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s1>), %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv64i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv64i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s1>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv64i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s1>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF + %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 64 x s1>), %0 + $v8 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s8>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s8>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s8>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s8>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s8>), %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s8>), %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv64i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv64i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv64i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF + %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 64 x s8>), %0 + $v8 = COPY %1(<vscale x 64 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s16>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s16>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s16>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s16>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s16>), %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF + %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s16>), %0 + $v8 = COPY %1(<vscale x 32 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s32>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s32>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s32>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s32>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s32>), %0 + $v8 = COPY %1(<vscale x 16 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s64>), %0 + $v8 = COPY %1(<vscale x 1 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s64>), %0 + $v8 = COPY %1(<vscale x 2 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s64>), %0 + $v8 = COPY %1(<vscale x 4 x s1>) + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s64>), %0 + $v8 = COPY %1(<vscale x 8 x s1>) + PseudoRET implicit $v8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir new file mode 100644 index 0000000..a754b8b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir @@ -0,0 +1,820 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: sext_nxv1i16_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s16>) = G_SEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i32_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i16_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s16>) = G_SEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s8>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i16_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s16>) = G_SEXT %0(<vscale x 4 x s8>) + $v8 = COPY %1(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv4i32_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s8>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s8>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i16_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s16>) = G_SEXT %0(<vscale x 8 x s8>) + $v8m2 = COPY %1(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv8i32_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s8>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s8>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i16_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 16 x s8>) = COPY $v8m2 + %1:_(<vscale x 16 x s16>) = G_SEXT %0(<vscale x 16 x s8>) + $v8m4 = COPY %1(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv16i32_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 16 x s8>) = COPY $v8m2 + %1:_(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s8>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv32i16_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 32 x s8>) = COPY $v8m4 + %1:_(<vscale x 32 x s16>) = G_SEXT %0(<vscale x 32 x s8>) + $v8m8 = COPY %1(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i32_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = COPY $v8 + %1:_(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s16>) = COPY $v8 + %1:_(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s16>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>) + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s16>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s16>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i32_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %1:_(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s16>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>) + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %1:_(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s16>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i32_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 8 x s16>) = COPY $v8m2 + %1:_(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s16>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>) + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s16>) = COPY $v8m2 + %1:_(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s16>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i32_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>) + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>) + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 16 x s16>) = COPY $v8m4 + %1:_(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s16>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i64_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>) + ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>) + ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s32>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s32>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>) + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>) + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s32>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s32>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>) + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>) + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s32>) = COPY $v8m2 + %1:_(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s32>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>) + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>) + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s32>) = COPY $v8m4 + %1:_(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s32>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir new file mode 100644 index 0000000..c3bc4a9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir @@ -0,0 +1,820 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: zext_nxv1i16_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s16>) = G_ZEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s16>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i32_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s8>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s8>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i16_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s16>) = G_ZEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s16>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s8>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s8>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s8>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i16_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s16>) = G_ZEXT %0(<vscale x 4 x s8>) + $v8 = COPY %1(<vscale x 4 x s16>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv4i32_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s8>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %1:_(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s8>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i16_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s16>) = G_ZEXT %0(<vscale x 8 x s8>) + $v8m2 = COPY %1(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv8i32_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s8>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s8>) = COPY $v8 + %1:_(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s8>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i16_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 16 x s8>) = COPY $v8m2 + %1:_(<vscale x 16 x s16>) = G_ZEXT %0(<vscale x 16 x s8>) + $v8m4 = COPY %1(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv16i32_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 16 x s8>) = COPY $v8m2 + %1:_(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s8>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv32i16_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>) + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>) + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 32 x s8>) = COPY $v8m4 + %1:_(<vscale x 32 x s16>) = G_ZEXT %0(<vscale x 32 x s8>) + $v8m8 = COPY %1(<vscale x 32 x s16>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i32_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = COPY $v8 + %1:_(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s16>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s16>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 2 x s16>) = COPY $v8 + %1:_(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s16>) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>) + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s16>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s16>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i32_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %1:_(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s16>) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>) + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %1:_(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s16>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i32_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 8 x s16>) = COPY $v8m2 + %1:_(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s16>) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>) + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s16>) = COPY $v8m4 + %1:_(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s16>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i32_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>) + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>) + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 16 x s16>) = COPY $v8m4 + %1:_(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s16>) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i64_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>) + ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>) + ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 1 x s32>) = COPY $v8 + %1:_(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s32>) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>) + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>) + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 2 x s32>) = COPY $v8 + %1:_(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s32>) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>) + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>) + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s32>) = COPY $v8m2 + %1:_(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s32>) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>) + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>) + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>) + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_(<vscale x 8 x s32>) = COPY $v8m4 + %1:_(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s32>) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index bafa92e..65d0768 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -18,14 +18,12 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 { ; RV32-NEXT: vmsne.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32-NEXT: vand.vv v8, v11, v8 +; RV32-NEXT: vmerge.vvm v8, v8, v11, v0 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: ctz_nxv4i32: @@ -41,14 +39,12 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 { ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vand.vv v8, v11, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v11, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: subw a0, a0, a1 +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32> %a, i1 0) ret i32 %res @@ -158,8 +154,7 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) { ; RV64-NEXT: li a1, -1 ; RV64-NEXT: vmadd.vx v16, a1, v8 ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vand.vv v8, v16, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: subw a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll index f5305a1..83d1d1b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll @@ -19,10 +19,9 @@ define <4 x i64> @vwsll_vv_v4i64_sext(<4 x i32> %a, <4 x i32> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i32> %a to <4 x i64> %y = sext <4 x i32> %b to <4 x i64> @@ -41,10 +40,9 @@ define <4 x i64> @vwsll_vv_v4i64_zext(<4 x i32> %a, <4 x i32> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i32> %a to <4 x i64> %y = zext <4 x i32> %b to <4 x i64> @@ -62,9 +60,9 @@ define <4 x i64> @vwsll_vx_i64_v4i64(<4 x i32> %a, i64 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i64_v4i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i64> poison, i64 %b, i32 0 %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer @@ -88,10 +86,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_sext(<4 x i32> %a, i32 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i32> poison, i32 %b, i32 0 %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer @@ -116,10 +112,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i32> poison, i32 %b, i32 0 %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer @@ -142,12 +136,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_sext(<4 x i32> %a, i16 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i16> poison, i16 %b, i32 0 %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer @@ -170,12 +161,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_zext(<4 x i32> %a, i16 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i16> poison, i16 %b, i32 0 %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer @@ -198,12 +186,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_sext(<4 x i32> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i8> poison, i8 %b, i32 0 %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer @@ -226,12 +211,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i8> poison, i8 %b, i32 0 %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer @@ -251,9 +233,9 @@ define <4 x i64> @vwsll_vi_v4i64(<4 x i32> %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_v4i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i32> %a to <4 x i64> %z = shl <4 x i64> %x, splat (i64 2) @@ -275,10 +257,9 @@ define <8 x i32> @vwsll_vv_v8i32_sext(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <8 x i16> %a to <8 x i32> %y = sext <8 x i16> %b to <8 x i32> @@ -297,10 +278,9 @@ define <8 x i32> @vwsll_vv_v8i32_zext(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <8 x i16> %a to <8 x i32> %y = zext <8 x i16> %b to <8 x i32> @@ -318,9 +298,9 @@ define <8 x i32> @vwsll_vx_i64_v8i32(<8 x i16> %a, i64 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i64_v8i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i64> poison, i64 %b, i32 0 %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -340,9 +320,9 @@ define <8 x i32> @vwsll_vx_i32_v8i32(<8 x i16> %a, i32 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v8i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i32> poison, i32 %b, i32 0 %splat = shufflevector <8 x i32> %head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -366,10 +346,8 @@ define <8 x i32> @vwsll_vx_i16_v8i32_sext(<8 x i16> %a, i16 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i16> poison, i16 %b, i32 0 %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer @@ -394,10 +372,8 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i16> poison, i16 %b, i32 0 %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer @@ -420,12 +396,9 @@ define <8 x i32> @vwsll_vx_i8_v8i32_sext(<8 x i16> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i8> poison, i8 %b, i32 0 %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer @@ -448,12 +421,9 @@ define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i8> poison, i8 %b, i32 0 %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer @@ -473,9 +443,9 @@ define <8 x i32> @vwsll_vi_v8i32(<8 x i16> %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_v8i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <8 x i16> %a to <8 x i32> %z = shl <8 x i32> %x, splat (i32 2) @@ -497,10 +467,9 @@ define <16 x i16> @vwsll_vv_v16i16_sext(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <16 x i8> %a to <16 x i16> %y = sext <16 x i8> %b to <16 x i16> @@ -519,10 +488,9 @@ define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <16 x i8> %a to <16 x i16> %y = zext <16 x i8> %b to <16 x i16> @@ -552,12 +520,9 @@ define <16 x i16> @vwsll_vx_i32_v16i16(<16 x i8> %a, i32 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v16i16: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v12, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vnsrl.wi v8, v12, 0 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i32> poison, i32 %b, i32 0 %splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer @@ -577,9 +542,9 @@ define <16 x i16> @vwsll_vx_i16_v16i16(<16 x i8> %a, i16 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v16i16: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i16> poison, i16 %b, i32 0 %splat = shufflevector <16 x i16> %head, <16 x i16> poison, <16 x i32> zeroinitializer @@ -603,10 +568,8 @@ define <16 x i16> @vwsll_vx_i8_v16i16_sext(<16 x i8> %a, i8 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i8> poison, i8 %b, i32 0 %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer @@ -631,10 +594,8 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i8> poison, i8 %b, i32 0 %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer @@ -654,9 +615,9 @@ define <16 x i16> @vwsll_vi_v16i16(<16 x i8> %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_v16i16: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <16 x i8> %a to <16 x i16> %z = shl <16 x i16> %x, splat (i16 2) diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll new file mode 100644 index 0000000..3a8d08f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +; The following binop x, (zext i1) tests will be vector-legalized into a vselect +; of two splat_vectors, but on RV64 the splat value will be implicitly +; truncated: +; +; t15: nxv2i32 = splat_vector Constant:i64<1> +; t13: nxv2i32 = splat_vector Constant:i64<0> +; t16: nxv2i32 = vselect t2, t15, t13 +; t7: nxv2i32 = add t4, t16 +; +; Make sure that foldSelectWithIdentityConstant in DAGCombiner.cpp handles the +; truncating splat, so we pull the vselect back and fold it into a mask. + +define <vscale x 2 x i32> @i1_zext_add(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) { +; CHECK-LABEL: i1_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32> + %add = add <vscale x 2 x i32> %b, %zext + ret <vscale x 2 x i32> %add +} + +define <vscale x 2 x i32> @i1_zext_add_commuted(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) { +; CHECK-LABEL: i1_zext_add_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32> + %add = add <vscale x 2 x i32> %zext, %b + ret <vscale x 2 x i32> %add +} + +define <vscale x 2 x i32> @i1_zext_sub(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) { +; CHECK-LABEL: i1_zext_sub: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32> + %sub = sub <vscale x 2 x i32> %b, %zext + ret <vscale x 2 x i32> %sub +} + +define <vscale x 2 x i32> @i1_zext_or(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) { +; CHECK-LABEL: i1_zext_or: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vor.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32> + %or = or <vscale x 2 x i32> %b, %zext + ret <vscale x 2 x i32> %or +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index e56dca0..a14ce71 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -149,49 +149,49 @@ define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y } define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { -; RV32-LABEL: vwop_vscale_sext_i1i32_multiple_users: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; RV32-NEXT: vlm.v v8, (a0) -; RV32-NEXT: vlm.v v9, (a1) -; RV32-NEXT: vlm.v v10, (a2) -; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vmerge.vim v12, v11, -1, v0 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: vmerge.vim v9, v11, -1, v0 -; RV32-NEXT: vmv.v.v v0, v10 -; RV32-NEXT: vmerge.vim v10, v11, -1, v0 -; RV32-NEXT: vmul.vv v9, v12, v9 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsub.vv v11, v12, v10 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vsub.vx v10, v10, a0, v0.t -; RV32-NEXT: vor.vv v8, v9, v10 -; RV32-NEXT: vor.vv v8, v8, v11 -; RV32-NEXT: ret +; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: +; NO_FOLDING: # %bb.0: +; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; NO_FOLDING-NEXT: vlm.v v8, (a0) +; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vlm.v v10, (a2) +; NO_FOLDING-NEXT: vmv.v.i v11, 0 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v9 +; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v10 +; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 +; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: li a0, 1 +; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v9, v10 +; NO_FOLDING-NEXT: vor.vv v8, v8, v11 +; NO_FOLDING-NEXT: ret ; -; RV64-LABEL: vwop_vscale_sext_i1i32_multiple_users: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; RV64-NEXT: vlm.v v8, (a0) -; RV64-NEXT: vlm.v v9, (a1) -; RV64-NEXT: vlm.v v10, (a2) -; RV64-NEXT: vmv.v.i v11, 0 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v12, v11, -1, v0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: vmerge.vim v9, v11, -1, v0 -; RV64-NEXT: vmv.v.v v0, v10 -; RV64-NEXT: vmerge.vim v10, v11, -1, v0 -; RV64-NEXT: vmul.vv v9, v12, v9 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v11, 1, v0 -; RV64-NEXT: vsub.vv v8, v10, v8 -; RV64-NEXT: vsub.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: ret +; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: +; FOLDING: # %bb.0: +; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; FOLDING-NEXT: vlm.v v8, (a0) +; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vlm.v v10, (a2) +; FOLDING-NEXT: vmv.v.i v11, 0 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 +; FOLDING-NEXT: vmv.v.v v0, v9 +; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; FOLDING-NEXT: vmv.v.v v0, v10 +; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 +; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: li a0, 1 +; FOLDING-NEXT: vsub.vv v11, v12, v10 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v9, v10 +; FOLDING-NEXT: vor.vv v8, v8, v11 +; FOLDING-NEXT: ret %a = load <vscale x 2 x i1>, ptr %x %b = load <vscale x 2 x i1>, ptr %y %b2 = load <vscale x 2 x i1>, ptr %z @@ -209,7 +209,7 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { ; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; NO_FOLDING: # %bb.0: -; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v8, (a0) ; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vlm.v v10, (a2) @@ -221,17 +221,17 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-NEXT: vmv1r.v v0, v10 ; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: li a0, 1 +; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v8, v11, 1, v0 -; NO_FOLDING-NEXT: vsub.vv v8, v10, v8 -; NO_FOLDING-NEXT: vsub.vv v10, v12, v10 -; NO_FOLDING-NEXT: vor.vv v8, v9, v8 -; NO_FOLDING-NEXT: vor.vv v8, v8, v10 +; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v9, v10 +; NO_FOLDING-NEXT: vor.vv v8, v8, v11 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v8, (a0) ; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vlm.v v10, (a2) @@ -243,12 +243,12 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING-NEXT: vmv1r.v v0, v10 ; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 ; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: li a0, 1 +; FOLDING-NEXT: vsub.vv v11, v12, v10 ; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vmerge.vim v8, v11, 1, v0 -; FOLDING-NEXT: vsub.vv v8, v10, v8 -; FOLDING-NEXT: vsub.vv v10, v12, v10 -; FOLDING-NEXT: vor.vv v8, v9, v8 -; FOLDING-NEXT: vor.vv v8, v8, v10 +; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v9, v10 +; FOLDING-NEXT: vor.vv v8, v8, v11 ; FOLDING-NEXT: ret %a = load <vscale x 2 x i1>, ptr %x %b = load <vscale x 2 x i1>, ptr %y @@ -444,41 +444,39 @@ define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y } define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { -; RV32-LABEL: vwop_vscale_zext_i1i32_multiple_users: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; RV32-NEXT: vlm.v v0, (a0) -; RV32-NEXT: vlm.v v8, (a2) -; RV32-NEXT: vlm.v v9, (a1) -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vmerge.vim v11, v10, 1, v0 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v10, 1, v0 -; RV32-NEXT: vadd.vv v10, v11, v8 -; RV32-NEXT: vsub.vv v8, v11, v8 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: vor.vv v10, v10, v11, v0.t -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret +; NO_FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: +; NO_FOLDING: # %bb.0: +; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; NO_FOLDING-NEXT: vlm.v v0, (a0) +; NO_FOLDING-NEXT: vlm.v v8, (a2) +; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vmv.v.i v10, 0 +; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 +; NO_FOLDING-NEXT: vmv.v.v v0, v9 +; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 +; NO_FOLDING-NEXT: ret ; -; RV64-LABEL: vwop_vscale_zext_i1i32_multiple_users: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; RV64-NEXT: vlm.v v0, (a0) -; RV64-NEXT: vlm.v v8, (a1) -; RV64-NEXT: vlm.v v9, (a2) -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vmerge.vim v11, v10, 1, v0 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v10, 1, v0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: vmerge.vim v9, v10, 1, v0 -; RV64-NEXT: vmul.vv v8, v11, v8 -; RV64-NEXT: vadd.vv v10, v11, v9 -; RV64-NEXT: vsub.vv v9, v11, v9 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: +; FOLDING: # %bb.0: +; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; FOLDING-NEXT: vlm.v v0, (a0) +; FOLDING-NEXT: vlm.v v8, (a2) +; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vmv.v.i v10, 0 +; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v8 +; FOLDING-NEXT: vsub.vv v8, v11, v8 +; FOLDING-NEXT: vmv.v.v v0, v9 +; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: ret %a = load <vscale x 2 x i1>, ptr %x %b = load <vscale x 2 x i1>, ptr %y %b2 = load <vscale x 2 x i1>, ptr %z @@ -496,40 +494,36 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, define <vscale x 2 x i8> @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { ; NO_FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: ; NO_FOLDING: # %bb.0: -; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v0, (a0) -; NO_FOLDING-NEXT: vlm.v v8, (a1) -; NO_FOLDING-NEXT: vlm.v v9, (a2) +; NO_FOLDING-NEXT: vlm.v v8, (a2) +; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vmv.v.i v10, 0 ; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 ; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 ; NO_FOLDING-NEXT: vmv1r.v v0, v9 -; NO_FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 -; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 -; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 -; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 -; NO_FOLDING-NEXT: vor.vv v8, v8, v10 -; NO_FOLDING-NEXT: vor.vv v8, v8, v9 +; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v0, (a0) -; FOLDING-NEXT: vlm.v v8, (a1) -; FOLDING-NEXT: vlm.v v9, (a2) +; FOLDING-NEXT: vlm.v v8, (a2) +; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vmv.v.i v10, 0 ; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 ; FOLDING-NEXT: vmv1r.v v0, v8 ; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v8 +; FOLDING-NEXT: vsub.vv v8, v11, v8 ; FOLDING-NEXT: vmv1r.v v0, v9 -; FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 -; FOLDING-NEXT: vmul.vv v8, v11, v8 -; FOLDING-NEXT: vadd.vv v10, v11, v9 -; FOLDING-NEXT: vsub.vv v9, v11, v9 -; FOLDING-NEXT: vor.vv v8, v8, v10 -; FOLDING-NEXT: vor.vv v8, v8, v9 +; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 ; FOLDING-NEXT: ret %a = load <vscale x 2 x i1>, ptr %x %b = load <vscale x 2 x i1>, ptr %y @@ -594,3 +588,6 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y, +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 0d52dd7..0a5e501 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -825,3 +825,56 @@ define <vscale x 2 x i1> @select_cond_x_cond(<vscale x 2 x i1> %x, <vscale x 2 x %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %x, i32 %evl) ret <vscale x 2 x i1> %a } + +define <vscale x 2 x i1> @select_undef_T_F(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) { +; CHECK-LABEL: select_undef_T_F: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 %evl) + ret <vscale x 2 x i1> %a +} + +define <vscale x 2 x i1> @select_undef_undef_F(<vscale x 2 x i1> %x, i32 zeroext %evl) { +; CHECK-LABEL: select_undef_undef_F: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> undef, <vscale x 2 x i1> %x, i32 %evl) + ret <vscale x 2 x i1> %a +} + +define <vscale x 2 x i1> @select_unknown_undef_F(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) { +; CHECK-LABEL: select_unknown_undef_F: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> undef, <vscale x 2 x i1> %y, i32 %evl) + ret <vscale x 2 x i1> %a +} + +define <vscale x 2 x i1> @select_unknown_T_undef(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) { +; CHECK-LABEL: select_unknown_T_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> poison, i32 %evl) + ret <vscale x 2 x i1> %a +} + +define <vscale x 2 x i1> @select_false_T_F(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %z, i32 zeroext %evl) { +; CHECK-LABEL: select_false_T_F: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: ret + %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> %y, <vscale x 2 x i1> %z, i32 %evl) + ret <vscale x 2 x i1> %a +} + +define <vscale x 2 x i1> @select_unknown_T_T(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) { +; CHECK-LABEL: select_unknown_T_T: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %y, i32 %evl) + ret <vscale x 2 x i1> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll index 770bb56..082de2e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll @@ -627,3 +627,259 @@ define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a) { %z = shl <vscale x 8 x i16> %x, splat (i16 2) ret <vscale x 8 x i16> %z } + +; ============================================================================== +; i8 -> i64 +; ============================================================================== + +define <vscale x 2 x i64> @vwsll_vv_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) { +; CHECK-LABEL: vwsll_vv_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = sext <vscale x 2 x i8> %b to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vv_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) { +; CHECK-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = zext <vscale x 2 x i8> %b to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64_nxv2i8(<vscale x 2 x i8> %a, i64 %b) { +; CHECK-LABEL: vwsll_vx_i64_nxv2i64_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsll.vx v8, v10, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv2i64_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %splat + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = sext <vscale x 2 x i32> %splat to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = zext <vscale x 2 x i32> %splat to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = sext <vscale x 2 x i16> %splat to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = sext <vscale x 2 x i8> %splat to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %y = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, %y + ret <vscale x 2 x i64> %z +} + +define <vscale x 2 x i64> @vwsll_vi_nxv2i64_nxv2i8(<vscale x 2 x i8> %a) { +; CHECK-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: ret + %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64> + %z = shl <vscale x 2 x i64> %x, splat (i64 2) + ret <vscale x 2 x i64> %z +} diff --git a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll index 5bf2adb..07eb67d 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll @@ -11,3 +11,12 @@ entry: tail call void asm sideeffect "faddq $0,$1,$2", "{f38},{f0},{f0}"(fp128 0xL0, fp128 0xL0, fp128 0xL0) ret void } + +; CHECK-label:test_twinword_error +; CHECK: error: Hi part of pair should point to an even-numbered register +; CHECK: error: (note that in some cases it might be necessary to manually bind the input/output registers instead of relying on automatic allocation) + +define i64 @test_twinword_error(){ + %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i1}"() + ret i64 %1 +} diff --git a/llvm/test/CodeGen/SPARC/inlineasm.ll b/llvm/test/CodeGen/SPARC/inlineasm.ll index ec27598..9817d7c 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm.ll @@ -143,3 +143,12 @@ entry: %1 = call double asm sideeffect "faddd $1, $2, $0", "=f,f,e"(i64 0, i64 0) ret void } + +; CHECK-label:test_twinword +; CHECK: rd %asr5, %i1 +; CHECK: srlx %i1, 32, %i0 + +define i64 @test_twinword(){ + %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i0}"() + ret i64 %1 +} diff --git a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll index a4ca3aa..6057bf38 100644 --- a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll +++ b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll @@ -1,10 +1,14 @@ -; REQUIRES: spirv-tools -; RUN: llc -O0 -mtriple=spirv-unknown-linux %s -o - -filetype=obj | not spirv-val 2>&1 | FileCheck %s +; All OpVariable instructions in a function must be the first instructions in the first block -; TODO(#66261): The SPIR-V backend should reorder OpVariable instructions so this doesn't fail, -; but in the meantime it's a good example of the spirv-val tool working as intended. +; RUN: llc -O0 -mtriple=spirv-unknown-linux %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-linux %s -o - -filetype=obj | spirv-val %} -; CHECK: All OpVariable instructions in a function must be the first instructions in the first block. +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV-NEXT: OpLabel +; CHECK-SPIRV-NEXT: OpVariable +; CHECK-SPIRV-NEXT: OpVariable +; CHECK-SPIRV: OpReturn +; CHECK-SPIRV: OpFunctionEnd define void @main() #1 { entry: diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll index 1071d34..b039f80 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll @@ -10,22 +10,46 @@ ; CHECK-SPIRV-DAG: OpName %[[FooObj:.*]] "foo_object" ; CHECK-SPIRV-DAG: OpName %[[FooMemOrder:.*]] "mem_order" ; CHECK-SPIRV-DAG: OpName %[[FooFunc:.*]] "foo" + ; CHECK-SPIRV-DAG: %[[TyLong:.*]] = OpTypeInt 32 0 ; CHECK-SPIRV-DAG: %[[TyVoid:.*]] = OpTypeVoid +; CHECK-SPIRV-DAG: %[[TyGenPtrLong:.*]] = OpTypePointer Generic %[[TyLong]] ; CHECK-SPIRV-DAG: %[[TyPtrLong:.*]] = OpTypePointer CrossWorkgroup %[[TyLong]] ; CHECK-SPIRV-DAG: %[[TyFunPtrLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrLong]] -; CHECK-SPIRV-DAG: %[[TyGenPtrLong:.*]] = OpTypePointer Generic %[[TyLong]] +; CHECK-SPIRV-DAG: %[[TyGenPtrPtrLong:.*]] = OpTypePointer Generic %[[TyGenPtrLong]] ; CHECK-SPIRV-DAG: %[[TyFunGenPtrLongLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyGenPtrLong]] %[[TyLong]] +; CHECK-SPIRV-DAG: %[[TyChar:.*]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[TyGenPtrChar:.*]] = OpTypePointer Generic %[[TyChar]] +; CHECK-SPIRV-DAG: %[[TyGenPtrPtrChar:.*]] = OpTypePointer Generic %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrChar:.*]] = OpTypePointer Function %[[TyGenPtrChar]] ; CHECK-SPIRV-DAG: %[[Const3:.*]] = OpConstant %[[TyLong]] 3 + ; CHECK-SPIRV: %[[FunTest]] = OpFunction %[[TyVoid]] None %[[TyFunPtrLong]] ; CHECK-SPIRV: %[[ArgCum]] = OpFunctionParameter %[[TyPtrLong]] + ; CHECK-SPIRV: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[Addr]] %[[Const3]] + +; CHECK-SPIRV: %[[HalfAddr:.*]] = OpPtrCastToGeneric +; CHECK-SPIRV-NEXT: %[[HalfAddrCasted:.*]] = OpBitcast %[[TyGenPtrLong]] %[[HalfAddr]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[HalfAddrCasted]] %[[Const3]] + +; CHECK-SPIRV: %[[DblAddr:.*]] = OpPtrCastToGeneric +; CHECK-SPIRV-NEXT: %[[DblAddrCasted:.*]] = OpBitcast %[[TyGenPtrLong]] %[[DblAddr]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[DblAddrCasted]] %[[Const3]] + ; CHECK-SPIRV: %[[FooStub]] = OpFunction %[[TyVoid]] None %[[TyFunGenPtrLongLong]] ; CHECK-SPIRV: %[[StubObj]] = OpFunctionParameter %[[TyGenPtrLong]] ; CHECK-SPIRV: %[[MemOrder]] = OpFunctionParameter %[[TyLong]] + +; CHECK-SPIRV: %[[ObjectAddr:.*]] = OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV-NEXT: %[[ToGeneric:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[ObjectAddr]] +; CHECK-SPIRV-NEXT: %[[Casted:.*]] = OpBitcast %[[TyGenPtrPtrLong]] %[[ToGeneric]] +; CHECK-SPIRV-NEXT: OpStore %[[Casted]] %[[StubObj]] + ; CHECK-SPIRV: %[[FooFunc]] = OpFunction %[[TyVoid]] None %[[TyFunGenPtrLongLong]] ; CHECK-SPIRV: %[[FooObj]] = OpFunctionParameter %[[TyGenPtrLong]] ; CHECK-SPIRV: %[[FooMemOrder]] = OpFunctionParameter %[[TyLong]] + ; CHECK-SPIRV: OpFunctionCall %[[TyVoid]] %[[FooStub]] %[[FooObj]] %[[FooMemOrder]] define spir_kernel void @test(ptr addrspace(1) noundef align 4 %_arg_cum) { diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll new file mode 100644 index 0000000..edb31ff --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-DAG: OpName %[[Foo:.*]] "foo" +; CHECK-SPIRV-DAG: %[[TyChar:.*]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[TyVoid:.*]] = OpTypeVoid +; CHECK-SPIRV-DAG: %[[TyGenPtrChar:.*]] = OpTypePointer Generic %[[TyChar]] +; CHECK-SPIRV-DAG: %[[TyFunBar:.*]] = OpTypeFunction %[[TyVoid]] %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyLong:.*]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[TyGenPtrPtrChar:.*]] = OpTypePointer Generic %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyFunFoo:.*]] = OpTypeFunction %[[TyVoid]] %[[TyLong]] %[[TyGenPtrPtrChar]] %[[TyGenPtrPtrChar]] +; CHECK-SPIRV-DAG: %[[TyStruct:.*]] = OpTypeStruct %[[TyLong]] +; CHECK-SPIRV-DAG: %[[Const100:.*]] = OpConstant %[[TyLong]] 100 +; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrChar:.*]] = OpTypePointer Function %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyPtrStruct:.*]] = OpTypePointer Generic %[[TyStruct]] +; CHECK-SPIRV-DAG: %[[TyPtrLong:.*]] = OpTypePointer Generic %[[TyLong]] + +; CHECK-SPIRV: %[[Bar:.*]] = OpFunction %[[TyVoid]] None %[[TyFunBar]] +; CHECK-SPIRV: %[[BarArg:.*]] = OpFunctionParameter %[[TyGenPtrChar]] +; CHECK-SPIRV-NEXT: OpLabel +; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV: %[[Var1:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[#]] +; CHECK-SPIRV: %[[Var2:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[#]] +; CHECK-SPIRV: OpStore %[[#]] %[[BarArg]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[Foo]] %[[Const100]] %[[Var1]] %[[Var2]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[Foo]] %[[Const100]] %[[Var2]] %[[Var1]] + +; CHECK-SPIRV: %[[Foo]] = OpFunction %[[TyVoid]] None %[[TyFunFoo]] +; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyLong]] +; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyGenPtrPtrChar]] +; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyGenPtrPtrChar]] + +%class.CustomType = type { i64 } + +define linkonce_odr dso_local spir_func void @bar(ptr addrspace(4) noundef %first) { +entry: + %first.addr = alloca ptr addrspace(4) + %first.addr.ascast = addrspacecast ptr %first.addr to ptr addrspace(4) + %temp = alloca ptr addrspace(4), align 8 + %temp.ascast = addrspacecast ptr %temp to ptr addrspace(4) + store ptr addrspace(4) %first, ptr %first.addr + call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast) + call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast) + %var = alloca ptr addrspace(4), align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @foo(i64 noundef %offset, ptr addrspace(4) noundef dereferenceable(8) %in_acc1, ptr addrspace(4) noundef dereferenceable(8) %out_acc1) { +entry: + %r0 = load ptr addrspace(4), ptr addrspace(4) %in_acc1 + %arrayidx = getelementptr inbounds %class.CustomType, ptr addrspace(4) %r0, i64 42 + %r1 = load i64, ptr addrspace(4) %arrayidx + %r3 = load ptr addrspace(4), ptr addrspace(4) %out_acc1 + %r4 = getelementptr %class.CustomType, ptr addrspace(4) %r3, i64 43 + store i64 %r1, ptr addrspace(4) %r4 + ret void +} + diff --git a/llvm/test/CodeGen/WebAssembly/multi-return.ll b/llvm/test/CodeGen/WebAssembly/multi-return.ll index 3429cd5..293a1b3 100644 --- a/llvm/test/CodeGen/WebAssembly/multi-return.ll +++ b/llvm/test/CodeGen/WebAssembly/multi-return.ll @@ -78,18 +78,16 @@ define i64 @test4() { define { i64, i128 } @test5() { ; CHECK-LABEL: test5: ; CHECK: call return_multi_multi -; CHECK: i32.const $push8=, 8 -; CHECK: i32.add $push9=, $[[SP:[0-9]+]], $pop8 -; CHECK: i32.const $push0=, 16 -; CHECK: i32.add $push1=, $pop9, $pop0 +; CHECK: i32.const $push0=, 24 +; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) ; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) ; CHECK: i64.load $push2=, 16($[[SP]]) ; CHECK: i64.store 8($0), $pop2 +; CHECK: i64.store 16($0), $[[L1]] ; CHECK: i64.store 0($0), $[[L2]] -; CHECK: i32.const $push12=, 16 -; CHECK: i32.add $push3=, $0, $pop12 -; CHECK: i64.store 0($pop3), $[[L1]] +; CHECK: i32.const $push5=, 80 +; CHECK: i32.add $push6=, $3, $pop5 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 @@ -101,20 +99,20 @@ define { i64, i128 } @test5() { define { i128, i128 } @test6() { ; CHECK-LABEL: test6: ; CHECK: call return_multi_multi -; CHECK: i32.const $push0=, 64 +; CHECK: i32.const $push0=, 24 ; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) -; CHECK: i32.const $push2=, 24 +; CHECK: i32.const $push2=, 64 ; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) ; CHECK: i64.load $[[L3:[0-9]+]]=, 16($[[SP]]) ; CHECK: i64.load $push4=, 56($[[SP]]) ; CHECK: i64.store 16($0), $pop4 +; CHECK: i64.store 24($0), $[[L2]] ; CHECK: i64.store 0($0), $[[L3]] -; CHECK: i64.store 8($0), $[[L2]] -; CHECK: i32.const $push5=, 24 -; CHECK: i32.add $push6=, $0, $pop5 -; CHECK: i64.store 0($pop6), $[[L1]] +; CHECK: i64.store 8($0), $[[L1]] +; CHECK: i32.const $push7=, 80 +; CHECK: i32.add $push8=, $4, $pop7 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 %r3 = extractvalue { i64, i128, i192, i128, i64 } %t0, 3 @@ -129,19 +127,17 @@ define { i64, i192 } @test7() { ; CHECK: i32.const $push0=, 40 ; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) +; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) +; CHECK: i64.load $[[L3:[0-9]+]]=, 32($[[SP]]) ; CHECK: i32.const $push2=, 48 ; CHECK: i32.add $push3=, $[[SP]], $pop2 -; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) -; CHECK: i64.load $[[L3:[0-9]+]]=, 8($[[SP]]) -; CHECK: i64.load $push4=, 32($[[SP]]) -; CHECK: i64.store 8($0), $pop4 -; CHECK: i64.store 0($0), $[[L3]] -; CHECK: i32.const $push5=, 24 -; CHECK: i32.add $push6=, $0, $pop5 -; CHECK: i64.store 0($pop6), $[[L2]] -; CHECK: i32.const $push7=, 16 -; CHECK: i32.add $push8=, $0, $pop7 -; CHECK: i64.store 0($pop8), $[[L1]] +; CHECK: i64.load $push4=, 0($pop3) +; CHECK: i64.store 24($0), $pop4 +; CHECK: i64.store 8($0), $[[L3]] +; CHECK: i64.store 16($0), $[[L1]] +; CHECK: i64.store 0($0), $[[L2]] +; CHECK: i32.const $push7=, 80 +; CHECK: i32.add $push8=, $4, $pop7 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r2 = extractvalue { i64, i128, i192, i128, i64 } %t0, 2 @@ -153,18 +149,16 @@ define { i64, i192 } @test7() { define { i128, i192, i128, i64 } @test8() { ; CHECK-LABEL: test8: ; CHECK: call return_multi_multi -; CHECK: i32.const $push18=, 8 -; CHECK: i32.add $push19=, $[[SP:[0-9]+]], $pop18 -; CHECK: i32.const $push0=, 32 -; CHECK: i32.add $push1=, $pop19, $pop0 +; CHECK: i32.const $push0=, 64 +; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) -; CHECK: i32.const $push2=, 48 +; CHECK: i32.const $push2=, 40 ; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) -; CHECK: i32.const $push4=, 24 +; CHECK: i32.const $push4=, 48 ; CHECK: i32.add $push5=, $[[SP]], $pop4 ; CHECK: i64.load $[[L3:[0-9]+]]=, 0($pop5) -; CHECK: i32.const $push6=, 64 +; CHECK: i32.const $push6=, 24 ; CHECK: i32.add $push7=, $[[SP]], $pop6 ; CHECK: i64.load $[[L4:[0-9]+]]=, 0($pop7) ; CHECK: i64.load $[[L5:[0-9]+]]=, 8($[[SP]]) @@ -172,19 +166,15 @@ define { i128, i192, i128, i64 } @test8() { ; CHECK: i64.load $[[L7:[0-9]+]]=, 32($[[SP]]) ; CHECK: i64.load $push8=, 16($[[SP]]) ; CHECK: i64.store 40($0), $pop8 +; CHECK: i64.store 48($0), $[[L4]] +; CHECK: i64.store 32($0), $[[L3]] ; CHECK: i64.store 16($0), $[[L7]] +; CHECK: i64.store 24($0), $[[L2]] ; CHECK: i64.store 0($0), $[[L6]] -; CHECK: i64.store 8($0), $[[L4]] +; CHECK: i64.store 8($0), $[[L1]] ; CHECK: i64.store 56($0), $[[L5]] -; CHECK: i32.const $push9=, 48 -; CHECK: i32.add $push10=, $0, $pop9 -; CHECK: i64.store 0($pop10), $[[L3]] -; CHECK: i32.const $push22=, 32 -; CHECK: i32.add $push11=, $0, $pop22 -; CHECK: i64.store 0($pop11), $[[L2]] -; CHECK: i32.const $push12=, 24 -; CHECK: i32.add $push13=, $0, $pop12 -; CHECK: i64.store 0($pop13), $[[L1]] +; CHECK: i32.const $push11=, 80 +; CHECK: i32.add $push12=, $8, $pop11 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index 3a806b9..761a754 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -31,60 +31,38 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: add_v16i8: ; NO-SIMD128: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.add $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.add $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.add $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.add $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.add $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.add $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.add $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.add $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.add $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.add $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.add $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.add $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.add $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.add $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.add $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.add $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.add $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.add $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.add $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.add $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v16i8: @@ -96,54 +74,32 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.add $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y ret <16 x i8> %a @@ -165,60 +121,38 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: sub_v16i8: ; NO-SIMD128: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.sub $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.sub $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.sub $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.sub $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.sub $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.sub $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.sub $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.sub $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.sub $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.sub $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.sub $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.sub $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.sub $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.sub $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.sub $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.sub $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.sub $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.sub $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.sub $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.sub $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.sub $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.sub $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.sub $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.sub $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.sub $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.sub $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v16i8: @@ -230,54 +164,32 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.sub $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.sub $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.sub $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.sub $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> %x, %y ret <16 x i8> %a @@ -425,60 +337,38 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: mul_v16i8: ; NO-SIMD128: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.mul $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.mul $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.mul $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.mul $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.mul $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.mul $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.mul $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.mul $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.mul $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.mul $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.mul $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.mul $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.mul $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.mul $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.mul $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.mul $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.mul $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.mul $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.mul $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.mul $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.mul $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.mul $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.mul $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.mul $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.mul $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.mul $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v16i8: @@ -490,54 +380,32 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.mul $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.mul $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.mul $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.mul $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = mul <16 x i8> %x, %y ret <16 x i8> %a @@ -559,108 +427,86 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: min_s_v16i8: ; NO-SIMD128: .functype min_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 -; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 13 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop7 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30 +; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29 ; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push22=, 12 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 -; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push28=, 11 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28 +; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27 +; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop23 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26 ; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24 -; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push34=, 10 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 -; NO-SIMD128-NEXT: i32.lt_s $push32=, $pop31, $pop30 -; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push40=, 9 -; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25 +; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop31 +; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24 +; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop35 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23 ; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 -; NO-SIMD128-NEXT: i32.lt_s $push44=, $pop43, $pop42 -; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-NEXT: i32.const $push50=, 7 -; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 -; NO-SIMD128-NEXT: i32.lt_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 6 -; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22 +; NO-SIMD128-NEXT: i32.lt_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop43 +; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21 +; NO-SIMD128-NEXT: i32.lt_s $push46=, $pop45, $pop44 +; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop47 +; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20 +; NO-SIMD128-NEXT: i32.lt_s $push50=, $pop49, $pop48 +; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop51 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19 ; NO-SIMD128-NEXT: i32.lt_s $push54=, $pop53, $pop52 -; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 -; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 -; NO-SIMD128-NEXT: i32.const $push62=, 5 -; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 -; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 -; NO-SIMD128-NEXT: i32.lt_s $push60=, $pop59, $pop58 -; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 -; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 -; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 -; NO-SIMD128-NEXT: i32.lt_s $push66=, $pop65, $pop64 -; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 -; NO-SIMD128-NEXT: i32.const $push72=, 3 -; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 -; NO-SIMD128-NEXT: i32.lt_s $push70=, $pop69, $pop68 -; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 -; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 -; NO-SIMD128-NEXT: i32.lt_s $push76=, $pop75, $pop74 -; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 -; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 -; NO-SIMD128-NEXT: i32.lt_s $push80=, $pop79, $pop78 -; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 -; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 -; NO-SIMD128-NEXT: i32.lt_s $push84=, $pop83, $pop82 -; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop55 +; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18 +; NO-SIMD128-NEXT: i32.lt_s $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17 +; NO-SIMD128-NEXT: i32.lt_s $push62=, $pop61, $pop60 +; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop63 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v16i8: @@ -681,93 +527,71 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push34=, $pop33, $pop32 +; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push42=, $pop41, $pop40 -; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push46=, $pop45, $pop44 -; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push52=, $pop51, $pop50 -; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push50=, $pop49, $pop48 +; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push54=, $pop53, $pop52 +; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push64=, $pop63, $pop62 -; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push76=, $pop75, $pop74 -; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push82=, $pop81, $pop80 -; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push62=, $pop61, $pop60 +; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -790,140 +614,118 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: min_u_v16i8: ; NO-SIMD128: .functype min_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 255 ; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 -; NO-SIMD128-NEXT: i32.const $push117=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95 ; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push116=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 -; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push114=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 -; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push112=, 255 -; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 -; NO-SIMD128-NEXT: i32.const $push111=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 -; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push110=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 255 -; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 -; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.const $push108=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 -; NO-SIMD128-NEXT: i32.lt_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push106=, 255 -; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 -; NO-SIMD128-NEXT: i32.const $push105=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 -; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push104=, 255 -; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 -; NO-SIMD128-NEXT: i32.lt_u $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.const $push102=, 255 -; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 -; NO-SIMD128-NEXT: i32.lt_u $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 -; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 -; NO-SIMD128-NEXT: i32.lt_u $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 -; NO-SIMD128-NEXT: i32.lt_u $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 -; NO-SIMD128-NEXT: i32.lt_u $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93 +; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92 ; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 -; NO-SIMD128-NEXT: i32.lt_u $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91 +; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90 ; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 -; NO-SIMD128-NEXT: i32.lt_u $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 ; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88 ; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 -; NO-SIMD128-NEXT: i32.lt_u $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87 +; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push86=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85 +; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83 +; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81 +; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79 +; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77 +; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75 +; NO-SIMD128-NEXT: i32.lt_u $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73 +; NO-SIMD128-NEXT: i32.lt_u $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71 +; NO-SIMD128-NEXT: i32.lt_u $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69 +; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67 +; NO-SIMD128-NEXT: i32.lt_u $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65 +; NO-SIMD128-NEXT: i32.lt_u $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v16i8: @@ -931,138 +733,116 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 -; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1085,108 +865,86 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: max_s_v16i8: ; NO-SIMD128: .functype max_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 -; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 13 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop7 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30 +; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29 ; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push22=, 12 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 -; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push28=, 11 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28 +; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27 +; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop23 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26 ; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24 -; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push34=, 10 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 -; NO-SIMD128-NEXT: i32.gt_s $push32=, $pop31, $pop30 -; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push40=, 9 -; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25 +; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop31 +; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24 +; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop35 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23 ; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 -; NO-SIMD128-NEXT: i32.gt_s $push44=, $pop43, $pop42 -; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-NEXT: i32.const $push50=, 7 -; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 -; NO-SIMD128-NEXT: i32.gt_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 6 -; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22 +; NO-SIMD128-NEXT: i32.gt_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop43 +; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21 +; NO-SIMD128-NEXT: i32.gt_s $push46=, $pop45, $pop44 +; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop47 +; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20 +; NO-SIMD128-NEXT: i32.gt_s $push50=, $pop49, $pop48 +; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop51 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19 ; NO-SIMD128-NEXT: i32.gt_s $push54=, $pop53, $pop52 -; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 -; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 -; NO-SIMD128-NEXT: i32.const $push62=, 5 -; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 -; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 -; NO-SIMD128-NEXT: i32.gt_s $push60=, $pop59, $pop58 -; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 -; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 -; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 -; NO-SIMD128-NEXT: i32.gt_s $push66=, $pop65, $pop64 -; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 -; NO-SIMD128-NEXT: i32.const $push72=, 3 -; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 -; NO-SIMD128-NEXT: i32.gt_s $push70=, $pop69, $pop68 -; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 -; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 -; NO-SIMD128-NEXT: i32.gt_s $push76=, $pop75, $pop74 -; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 -; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 -; NO-SIMD128-NEXT: i32.gt_s $push80=, $pop79, $pop78 -; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 -; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 -; NO-SIMD128-NEXT: i32.gt_s $push84=, $pop83, $pop82 -; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop55 +; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18 +; NO-SIMD128-NEXT: i32.gt_s $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17 +; NO-SIMD128-NEXT: i32.gt_s $push62=, $pop61, $pop60 +; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop63 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v16i8: @@ -1207,93 +965,71 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push34=, $pop33, $pop32 +; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push42=, $pop41, $pop40 -; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push46=, $pop45, $pop44 -; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push52=, $pop51, $pop50 -; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push50=, $pop49, $pop48 +; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push54=, $pop53, $pop52 +; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push64=, $pop63, $pop62 -; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push76=, $pop75, $pop74 -; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push82=, $pop81, $pop80 -; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push62=, $pop61, $pop60 +; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1316,140 +1052,118 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: max_u_v16i8: ; NO-SIMD128: .functype max_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 255 ; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 -; NO-SIMD128-NEXT: i32.const $push117=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95 ; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push116=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 -; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push114=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 -; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push112=, 255 -; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 -; NO-SIMD128-NEXT: i32.const $push111=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 -; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push110=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 255 -; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 -; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.const $push108=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 -; NO-SIMD128-NEXT: i32.gt_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push106=, 255 -; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 -; NO-SIMD128-NEXT: i32.const $push105=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 -; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push104=, 255 -; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 -; NO-SIMD128-NEXT: i32.gt_u $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.const $push102=, 255 -; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 -; NO-SIMD128-NEXT: i32.gt_u $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 -; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 -; NO-SIMD128-NEXT: i32.gt_u $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 -; NO-SIMD128-NEXT: i32.gt_u $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 -; NO-SIMD128-NEXT: i32.gt_u $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93 +; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92 ; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 -; NO-SIMD128-NEXT: i32.gt_u $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91 +; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90 ; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 -; NO-SIMD128-NEXT: i32.gt_u $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 ; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88 ; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 -; NO-SIMD128-NEXT: i32.gt_u $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87 +; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push86=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85 +; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83 +; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81 +; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79 +; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77 +; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75 +; NO-SIMD128-NEXT: i32.gt_u $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73 +; NO-SIMD128-NEXT: i32.gt_u $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71 +; NO-SIMD128-NEXT: i32.gt_u $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69 +; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67 +; NO-SIMD128-NEXT: i32.gt_u $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65 +; NO-SIMD128-NEXT: i32.gt_u $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v16i8: @@ -1457,138 +1171,116 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 -; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1611,156 +1303,134 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: avgr_u_v16i8: ; NO-SIMD128: .functype avgr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 254 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push133=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push132=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 -; NO-SIMD128-NEXT: i32.const $push131=, 254 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 -; NO-SIMD128-NEXT: i32.const $push130=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push129=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 -; NO-SIMD128-NEXT: i32.const $push128=, 254 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 -; NO-SIMD128-NEXT: i32.const $push127=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push126=, 1 -; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 -; NO-SIMD128-NEXT: i32.const $push125=, 254 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 -; NO-SIMD128-NEXT: i32.const $push124=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 -; NO-SIMD128-NEXT: i32.const $push26=, 11 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push123=, 1 -; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 -; NO-SIMD128-NEXT: i32.const $push122=, 254 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 -; NO-SIMD128-NEXT: i32.const $push121=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 -; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 -; NO-SIMD128-NEXT: i32.const $push32=, 10 -; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push120=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 -; NO-SIMD128-NEXT: i32.const $push119=, 254 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 -; NO-SIMD128-NEXT: i32.const $push118=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 -; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 -; NO-SIMD128-NEXT: i32.const $push38=, 9 -; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push117=, 1 -; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 -; NO-SIMD128-NEXT: i32.const $push116=, 254 -; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 -; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 -; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push114=, 1 -; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 254 -; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 -; NO-SIMD128-NEXT: i32.const $push112=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 -; NO-SIMD128-NEXT: i32.const $push48=, 7 -; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 254 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 ; NO-SIMD128-NEXT: i32.const $push111=, 1 -; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 -; NO-SIMD128-NEXT: i32.const $push110=, 254 -; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 -; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 -; NO-SIMD128-NEXT: i32.const $push54=, 6 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push110=, 1 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 254 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109 ; NO-SIMD128-NEXT: i32.const $push108=, 1 -; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 254 -; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 -; NO-SIMD128-NEXT: i32.const $push106=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 -; NO-SIMD128-NEXT: i32.const $push60=, 5 -; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push107=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-NEXT: i32.const $push105=, 1 -; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 -; NO-SIMD128-NEXT: i32.const $push104=, 254 -; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 -; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 -; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push104=, 1 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 254 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-NEXT: i32.const $push102=, 1 -; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 254 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 -; NO-SIMD128-NEXT: i32.const $push100=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 -; NO-SIMD128-NEXT: i32.const $push70=, 3 -; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push101=, 1 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 254 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-NEXT: i32.const $push99=, 1 -; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 -; NO-SIMD128-NEXT: i32.const $push98=, 254 -; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 -; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push98=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-NEXT: i32.const $push96=, 1 -; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 254 -; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 -; NO-SIMD128-NEXT: i32.const $push94=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 -; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push95=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 254 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 1 -; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 -; NO-SIMD128-NEXT: i32.const $push92=, 254 -; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 -; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push92=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 254 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-NEXT: i32.const $push90=, 1 -; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 254 -; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-NEXT: i32.const $push88=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push89=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push86=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 254 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-NEXT: i32.const $push84=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push42=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push83=, 1 +; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-NEXT: i32.const $push82=, 254 +; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop45 +; NO-SIMD128-NEXT: i32.add $push46=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push80=, 1 +; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 254 +; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-NEXT: i32.const $push78=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop49 +; NO-SIMD128-NEXT: i32.add $push50=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push77=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-NEXT: i32.const $push76=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop53 +; NO-SIMD128-NEXT: i32.add $push54=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push74=, 1 +; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 254 +; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-NEXT: i32.const $push72=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop57 +; NO-SIMD128-NEXT: i32.add $push58=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push71=, 1 +; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-NEXT: i32.const $push70=, 254 +; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop61 +; NO-SIMD128-NEXT: i32.add $push62=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push68=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-NEXT: i32.const $push66=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop65 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v16i8: @@ -1771,151 +1441,129 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 -; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 -; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 -; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 -; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 -; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 -; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 -; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 -; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 -; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 -; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 -; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 -; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 -; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57 +; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65 ; NO-SIMD128-FAST-NEXT: return %a = add nuw <16 x i8> %x, %y %b = add nuw <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, @@ -1949,156 +1597,134 @@ define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: avgr_u_v16i8_wrap: ; NO-SIMD128: .functype avgr_u_v16i8_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 254 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push133=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push132=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 -; NO-SIMD128-NEXT: i32.const $push131=, 254 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 -; NO-SIMD128-NEXT: i32.const $push130=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push129=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 -; NO-SIMD128-NEXT: i32.const $push128=, 254 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 -; NO-SIMD128-NEXT: i32.const $push127=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push126=, 1 -; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 -; NO-SIMD128-NEXT: i32.const $push125=, 254 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 -; NO-SIMD128-NEXT: i32.const $push124=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 -; NO-SIMD128-NEXT: i32.const $push26=, 11 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push123=, 1 -; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 -; NO-SIMD128-NEXT: i32.const $push122=, 254 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 -; NO-SIMD128-NEXT: i32.const $push121=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 -; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 -; NO-SIMD128-NEXT: i32.const $push32=, 10 -; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push120=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 -; NO-SIMD128-NEXT: i32.const $push119=, 254 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 -; NO-SIMD128-NEXT: i32.const $push118=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 -; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 -; NO-SIMD128-NEXT: i32.const $push38=, 9 -; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push117=, 1 -; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 -; NO-SIMD128-NEXT: i32.const $push116=, 254 -; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 -; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 -; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push114=, 1 -; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 254 -; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 -; NO-SIMD128-NEXT: i32.const $push112=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 -; NO-SIMD128-NEXT: i32.const $push48=, 7 -; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 254 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 ; NO-SIMD128-NEXT: i32.const $push111=, 1 -; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 -; NO-SIMD128-NEXT: i32.const $push110=, 254 -; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 -; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 -; NO-SIMD128-NEXT: i32.const $push54=, 6 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push110=, 1 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 254 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109 ; NO-SIMD128-NEXT: i32.const $push108=, 1 -; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 254 -; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 -; NO-SIMD128-NEXT: i32.const $push106=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 -; NO-SIMD128-NEXT: i32.const $push60=, 5 -; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push107=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-NEXT: i32.const $push105=, 1 -; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 -; NO-SIMD128-NEXT: i32.const $push104=, 254 -; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 -; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 -; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push104=, 1 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 254 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-NEXT: i32.const $push102=, 1 -; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 254 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 -; NO-SIMD128-NEXT: i32.const $push100=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 -; NO-SIMD128-NEXT: i32.const $push70=, 3 -; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push101=, 1 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 254 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-NEXT: i32.const $push99=, 1 -; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 -; NO-SIMD128-NEXT: i32.const $push98=, 254 -; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 -; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push98=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-NEXT: i32.const $push96=, 1 -; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 254 -; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 -; NO-SIMD128-NEXT: i32.const $push94=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 -; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push95=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 254 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 1 -; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 -; NO-SIMD128-NEXT: i32.const $push92=, 254 -; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 -; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push92=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 254 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-NEXT: i32.const $push90=, 1 -; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 254 -; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-NEXT: i32.const $push88=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push89=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push86=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 254 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-NEXT: i32.const $push84=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push42=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push83=, 1 +; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-NEXT: i32.const $push82=, 254 +; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop45 +; NO-SIMD128-NEXT: i32.add $push46=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push80=, 1 +; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 254 +; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-NEXT: i32.const $push78=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop49 +; NO-SIMD128-NEXT: i32.add $push50=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push77=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-NEXT: i32.const $push76=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop53 +; NO-SIMD128-NEXT: i32.add $push54=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push74=, 1 +; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 254 +; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-NEXT: i32.const $push72=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop57 +; NO-SIMD128-NEXT: i32.add $push58=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push71=, 1 +; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-NEXT: i32.const $push70=, 254 +; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop61 +; NO-SIMD128-NEXT: i32.add $push62=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push68=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-NEXT: i32.const $push66=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop65 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v16i8_wrap: @@ -2109,151 +1735,129 @@ define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 -; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 -; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 -; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 -; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 -; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 -; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 -; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 -; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 -; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 -; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 -; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 -; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 -; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57 +; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65 ; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y %b = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, @@ -2279,140 +1883,118 @@ define <16 x i8> @abs_v16i8(<16 x i8> %x) { ; NO-SIMD128-LABEL: abs_v16i8: ; NO-SIMD128: .functype abs_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $16 ; NO-SIMD128-NEXT: i32.const $push1=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push117=, $pop0, $pop1 -; NO-SIMD128-NEXT: local.tee $push116=, $17=, $pop117 -; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop116 +; NO-SIMD128-NEXT: i32.shr_s $push95=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push94=, $17=, $pop95 +; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop94 ; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $15 -; NO-SIMD128-NEXT: i32.const $push115=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push114=, $pop6, $pop115 -; NO-SIMD128-NEXT: local.tee $push113=, $16=, $pop114 -; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop113 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $14 -; NO-SIMD128-NEXT: i32.const $push112=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push111=, $pop11, $pop112 -; NO-SIMD128-NEXT: local.tee $push110=, $16=, $pop111 -; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop110 -; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop13 -; NO-SIMD128-NEXT: i32.const $push19=, 12 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend8_s $push16=, $13 -; NO-SIMD128-NEXT: i32.const $push109=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push108=, $pop16, $pop109 -; NO-SIMD128-NEXT: local.tee $push107=, $16=, $pop108 -; NO-SIMD128-NEXT: i32.xor $push17=, $13, $pop107 -; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 11 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.extend8_s $push21=, $12 -; NO-SIMD128-NEXT: i32.const $push106=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push105=, $pop21, $pop106 -; NO-SIMD128-NEXT: local.tee $push104=, $16=, $pop105 -; NO-SIMD128-NEXT: i32.xor $push22=, $12, $pop104 -; NO-SIMD128-NEXT: i32.sub $push23=, $pop22, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 10 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push26=, $11 -; NO-SIMD128-NEXT: i32.const $push103=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push102=, $pop26, $pop103 -; NO-SIMD128-NEXT: local.tee $push101=, $16=, $pop102 -; NO-SIMD128-NEXT: i32.xor $push27=, $11, $pop101 -; NO-SIMD128-NEXT: i32.sub $push28=, $pop27, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 9 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $10 -; NO-SIMD128-NEXT: i32.const $push100=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push99=, $pop31, $pop100 -; NO-SIMD128-NEXT: local.tee $push98=, $16=, $pop99 -; NO-SIMD128-NEXT: i32.xor $push32=, $10, $pop98 -; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $9 -; NO-SIMD128-NEXT: i32.const $push97=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push96=, $pop36, $pop97 -; NO-SIMD128-NEXT: local.tee $push95=, $16=, $pop96 -; NO-SIMD128-NEXT: i32.xor $push37=, $9, $pop95 -; NO-SIMD128-NEXT: i32.sub $push38=, $pop37, $16 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop38 -; NO-SIMD128-NEXT: i32.const $push94=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop94 -; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15 ; NO-SIMD128-NEXT: i32.const $push93=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop39, $pop93 +; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop4, $pop93 ; NO-SIMD128-NEXT: local.tee $push91=, $16=, $pop92 -; NO-SIMD128-NEXT: i32.xor $push40=, $8, $pop91 -; NO-SIMD128-NEXT: i32.sub $push41=, $pop40, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop41 -; NO-SIMD128-NEXT: i32.const $push46=, 6 -; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 +; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop91 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14 ; NO-SIMD128-NEXT: i32.const $push90=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop43, $pop90 +; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop7, $pop90 ; NO-SIMD128-NEXT: local.tee $push88=, $16=, $pop89 -; NO-SIMD128-NEXT: i32.xor $push44=, $7, $pop88 -; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.extend8_s $push48=, $6 +; NO-SIMD128-NEXT: i32.xor $push8=, $14, $pop88 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $16 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13 ; NO-SIMD128-NEXT: i32.const $push87=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop48, $pop87 +; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop10, $pop87 ; NO-SIMD128-NEXT: local.tee $push85=, $16=, $pop86 -; NO-SIMD128-NEXT: i32.xor $push49=, $6, $pop85 -; NO-SIMD128-NEXT: i32.sub $push50=, $pop49, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $5 +; NO-SIMD128-NEXT: i32.xor $push11=, $13, $pop85 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $16 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12 ; NO-SIMD128-NEXT: i32.const $push84=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop53, $pop84 +; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop13, $pop84 ; NO-SIMD128-NEXT: local.tee $push82=, $16=, $pop83 -; NO-SIMD128-NEXT: i32.xor $push54=, $5, $pop82 -; NO-SIMD128-NEXT: i32.sub $push55=, $pop54, $16 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 -; NO-SIMD128-NEXT: i32.const $push59=, 3 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.extend8_s $push56=, $4 +; NO-SIMD128-NEXT: i32.xor $push14=, $12, $pop82 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $16 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11 ; NO-SIMD128-NEXT: i32.const $push81=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop56, $pop81 +; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop16, $pop81 ; NO-SIMD128-NEXT: local.tee $push79=, $16=, $pop80 -; NO-SIMD128-NEXT: i32.xor $push57=, $4, $pop79 -; NO-SIMD128-NEXT: i32.sub $push58=, $pop57, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.extend8_s $push61=, $3 +; NO-SIMD128-NEXT: i32.xor $push17=, $11, $pop79 +; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10 ; NO-SIMD128-NEXT: i32.const $push78=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop61, $pop78 +; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop19, $pop78 ; NO-SIMD128-NEXT: local.tee $push76=, $16=, $pop77 -; NO-SIMD128-NEXT: i32.xor $push62=, $3, $pop76 -; NO-SIMD128-NEXT: i32.sub $push63=, $pop62, $16 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $2 +; NO-SIMD128-NEXT: i32.xor $push20=, $10, $pop76 +; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $16 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9 ; NO-SIMD128-NEXT: i32.const $push75=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop64, $pop75 +; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop22, $pop75 ; NO-SIMD128-NEXT: local.tee $push73=, $16=, $pop74 -; NO-SIMD128-NEXT: i32.xor $push65=, $2, $pop73 -; NO-SIMD128-NEXT: i32.sub $push66=, $pop65, $16 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 -; NO-SIMD128-NEXT: i32.extend8_s $push67=, $1 +; NO-SIMD128-NEXT: i32.xor $push23=, $9, $pop73 +; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $16 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $8 ; NO-SIMD128-NEXT: i32.const $push72=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop67, $pop72 +; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop25, $pop72 ; NO-SIMD128-NEXT: local.tee $push70=, $16=, $pop71 -; NO-SIMD128-NEXT: i32.xor $push68=, $1, $pop70 -; NO-SIMD128-NEXT: i32.sub $push69=, $pop68, $16 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: i32.xor $push26=, $8, $pop70 +; NO-SIMD128-NEXT: i32.sub $push27=, $pop26, $16 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $7 +; NO-SIMD128-NEXT: i32.const $push69=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop28, $pop69 +; NO-SIMD128-NEXT: local.tee $push67=, $16=, $pop68 +; NO-SIMD128-NEXT: i32.xor $push29=, $7, $pop67 +; NO-SIMD128-NEXT: i32.sub $push30=, $pop29, $16 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $6 +; NO-SIMD128-NEXT: i32.const $push66=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push65=, $pop31, $pop66 +; NO-SIMD128-NEXT: local.tee $push64=, $16=, $pop65 +; NO-SIMD128-NEXT: i32.xor $push32=, $6, $pop64 +; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push34=, $5 +; NO-SIMD128-NEXT: i32.const $push63=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push62=, $pop34, $pop63 +; NO-SIMD128-NEXT: local.tee $push61=, $16=, $pop62 +; NO-SIMD128-NEXT: i32.xor $push35=, $5, $pop61 +; NO-SIMD128-NEXT: i32.sub $push36=, $pop35, $16 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $4 +; NO-SIMD128-NEXT: i32.const $push60=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push59=, $pop37, $pop60 +; NO-SIMD128-NEXT: local.tee $push58=, $16=, $pop59 +; NO-SIMD128-NEXT: i32.xor $push38=, $4, $pop58 +; NO-SIMD128-NEXT: i32.sub $push39=, $pop38, $16 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $3 +; NO-SIMD128-NEXT: i32.const $push57=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push56=, $pop40, $pop57 +; NO-SIMD128-NEXT: local.tee $push55=, $16=, $pop56 +; NO-SIMD128-NEXT: i32.xor $push41=, $3, $pop55 +; NO-SIMD128-NEXT: i32.sub $push42=, $pop41, $16 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.extend8_s $push43=, $2 +; NO-SIMD128-NEXT: i32.const $push54=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop43, $pop54 +; NO-SIMD128-NEXT: local.tee $push52=, $16=, $pop53 +; NO-SIMD128-NEXT: i32.xor $push44=, $2, $pop52 +; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.extend8_s $push46=, $1 +; NO-SIMD128-NEXT: i32.const $push51=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push50=, $pop46, $pop51 +; NO-SIMD128-NEXT: local.tee $push49=, $16=, $pop50 +; NO-SIMD128-NEXT: i32.xor $push47=, $1, $pop49 +; NO-SIMD128-NEXT: i32.sub $push48=, $pop47, $16 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v16i8: @@ -2420,138 +2002,116 @@ define <16 x i8> @abs_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push1=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push117=, $pop0, $pop1 -; NO-SIMD128-FAST-NEXT: local.tee $push116=, $17=, $pop117 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop116 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push94=, $17=, $pop95 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop94 ; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push114=, $pop4, $pop115 -; NO-SIMD128-FAST-NEXT: local.tee $push113=, $1=, $pop114 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop4, $pop93 +; NO-SIMD128-FAST-NEXT: local.tee $push91=, $1=, $pop92 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop91 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push111=, $pop7, $pop112 -; NO-SIMD128-FAST-NEXT: local.tee $push110=, $2=, $pop111 -; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop7, $pop90 +; NO-SIMD128-FAST-NEXT: local.tee $push88=, $2=, $pop89 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop88 ; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push108=, $pop10, $pop109 -; NO-SIMD128-FAST-NEXT: local.tee $push107=, $3=, $pop108 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop107 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push105=, $pop15, $pop106 -; NO-SIMD128-FAST-NEXT: local.tee $push104=, $4=, $pop105 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop104 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push102=, $pop18, $pop103 -; NO-SIMD128-FAST-NEXT: local.tee $push101=, $5=, $pop102 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop101 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push99=, $pop23, $pop100 -; NO-SIMD128-FAST-NEXT: local.tee $push98=, $6=, $pop99 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop98 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop97 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop28, $pop96 -; NO-SIMD128-FAST-NEXT: local.tee $push94=, $7=, $pop95 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop94 -; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $7 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop30 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $9 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop32, $pop93 -; NO-SIMD128-FAST-NEXT: local.tee $push91=, $8=, $pop92 -; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop91 -; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $pop33, $8 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop35, $pop90 -; NO-SIMD128-FAST-NEXT: local.tee $push88=, $9=, $pop89 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $10, $pop88 -; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $pop36, $9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $11 ; NO-SIMD128-FAST-NEXT: i32.const $push87=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop40, $pop87 -; NO-SIMD128-FAST-NEXT: local.tee $push85=, $10=, $pop86 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop85 -; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop10, $pop87 +; NO-SIMD128-FAST-NEXT: local.tee $push85=, $3=, $pop86 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop85 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5 ; NO-SIMD128-FAST-NEXT: i32.const $push84=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop45, $pop84 -; NO-SIMD128-FAST-NEXT: local.tee $push82=, $11=, $pop83 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop82 -; NO-SIMD128-FAST-NEXT: i32.sub $push47=, $pop46, $11 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $13 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop13, $pop84 +; NO-SIMD128-FAST-NEXT: local.tee $push82=, $4=, $pop83 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop82 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6 ; NO-SIMD128-FAST-NEXT: i32.const $push81=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop50, $pop81 -; NO-SIMD128-FAST-NEXT: local.tee $push79=, $12=, $pop80 -; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $13, $pop79 -; NO-SIMD128-FAST-NEXT: i32.sub $push52=, $pop51, $12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push55=, $14 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop16, $pop81 +; NO-SIMD128-FAST-NEXT: local.tee $push79=, $5=, $pop80 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop79 +; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 ; NO-SIMD128-FAST-NEXT: i32.const $push78=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop55, $pop78 -; NO-SIMD128-FAST-NEXT: local.tee $push76=, $13=, $pop77 -; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $14, $pop76 -; NO-SIMD128-FAST-NEXT: i32.sub $push57=, $pop56, $13 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop19, $pop78 +; NO-SIMD128-FAST-NEXT: local.tee $push76=, $6=, $pop77 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop76 +; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8 ; NO-SIMD128-FAST-NEXT: i32.const $push75=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop60, $pop75 -; NO-SIMD128-FAST-NEXT: local.tee $push73=, $14=, $pop74 -; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $15, $pop73 -; NO-SIMD128-FAST-NEXT: i32.sub $push62=, $pop61, $14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push69=, $0, $pop68 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push65=, $16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop22, $pop75 +; NO-SIMD128-FAST-NEXT: local.tee $push73=, $7=, $pop74 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop73 +; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9 ; NO-SIMD128-FAST-NEXT: i32.const $push72=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop65, $pop72 -; NO-SIMD128-FAST-NEXT: local.tee $push70=, $0=, $pop71 -; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $16, $pop70 -; NO-SIMD128-FAST-NEXT: i32.sub $push67=, $pop66, $0 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop69), $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop25, $pop72 +; NO-SIMD128-FAST-NEXT: local.tee $push70=, $8=, $pop71 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $9, $pop70 +; NO-SIMD128-FAST-NEXT: i32.sub $push27=, $pop26, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push68=, $pop28, $pop69 +; NO-SIMD128-FAST-NEXT: local.tee $push67=, $9=, $pop68 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $10, $pop67 +; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop31, $pop66 +; NO-SIMD128-FAST-NEXT: local.tee $push64=, $10=, $pop65 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $11, $pop64 +; NO-SIMD128-FAST-NEXT: i32.sub $push33=, $pop32, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push62=, $pop34, $pop63 +; NO-SIMD128-FAST-NEXT: local.tee $push61=, $11=, $pop62 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $12, $pop61 +; NO-SIMD128-FAST-NEXT: i32.sub $push36=, $pop35, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push59=, $pop37, $pop60 +; NO-SIMD128-FAST-NEXT: local.tee $push58=, $12=, $pop59 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $13, $pop58 +; NO-SIMD128-FAST-NEXT: i32.sub $push39=, $pop38, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push56=, $pop40, $pop57 +; NO-SIMD128-FAST-NEXT: local.tee $push55=, $13=, $pop56 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $14, $pop55 +; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push43=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push53=, $pop43, $pop54 +; NO-SIMD128-FAST-NEXT: local.tee $push52=, $14=, $pop53 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $15, $pop52 +; NO-SIMD128-FAST-NEXT: i32.sub $push45=, $pop44, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push46=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop46, $pop51 +; NO-SIMD128-FAST-NEXT: local.tee $push49=, $15=, $pop50 +; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $16, $pop49 +; NO-SIMD128-FAST-NEXT: i32.sub $push48=, $pop47, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> zeroinitializer, %x %b = icmp slt <16 x i8> %x, zeroinitializer @@ -2576,75 +2136,53 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) { ; NO-SIMD128: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $9 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop53, $5 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop52, $3 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop51, $2 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, 0 -; NO-SIMD128-NEXT: i32.sub $push5=, $pop50, $1 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, 0 -; NO-SIMD128-NEXT: i32.sub $push6=, $pop49, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, 0 -; NO-SIMD128-NEXT: i32.sub $push9=, $pop48, $15 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, 0 -; NO-SIMD128-NEXT: i32.sub $push12=, $pop47, $14 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, 0 -; NO-SIMD128-NEXT: i32.sub $push15=, $pop46, $13 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, 0 -; NO-SIMD128-NEXT: i32.sub $push18=, $pop45, $12 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, 0 -; NO-SIMD128-NEXT: i32.sub $push21=, $pop44, $11 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, 0 -; NO-SIMD128-NEXT: i32.sub $push24=, $pop43, $10 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, 0 -; NO-SIMD128-NEXT: i32.sub $push27=, $pop42, $8 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, 0 -; NO-SIMD128-NEXT: i32.sub $push30=, $pop41, $7 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push40=, 0 -; NO-SIMD128-NEXT: i32.sub $push33=, $pop40, $6 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push39=, 0 -; NO-SIMD128-NEXT: i32.sub $push36=, $pop39, $4 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $16 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop31, $15 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop30, $14 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop29, $13 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop28, $12 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, 0 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop27, $11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, 0 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop26, $10 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, 0 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop25, $9 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, 0 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop24, $8 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, 0 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop23, $7 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, 0 +; NO-SIMD128-NEXT: i32.sub $push11=, $pop22, $6 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, 0 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop21, $5 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, 0 +; NO-SIMD128-NEXT: i32.sub $push13=, $pop20, $4 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, 0 +; NO-SIMD128-NEXT: i32.sub $push14=, $pop19, $3 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 0 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop18, $2 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 0 +; NO-SIMD128-NEXT: i32.sub $push16=, $pop17, $1 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v16i8: @@ -2653,73 +2191,51 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop53, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop31, $2 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop52, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop30, $3 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop51, $4 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop50, $5 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop49, $6 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop48, $7 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop47, $8 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop46, $9 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop45, $10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push23=, $pop44, $11 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push26=, $pop43, $12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push29=, $pop42, $13 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push32=, $pop41, $14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push35=, $pop40, $15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push38=, $pop39, $16 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop29, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop28, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop27, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop26, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop25, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop24, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop23, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $pop22, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop21, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop20, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $pop19, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop18, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $16 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, @@ -2744,124 +2260,80 @@ define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push40=, $17, $pop0 -; NO-SIMD128-NEXT: local.tee $push39=, $17=, $pop40 -; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop39 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.shl $push3=, $3, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.shl $push4=, $2, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.shl $push5=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.shl $push6=, $16, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.shl $push9=, $15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.shl $push12=, $14, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.shl $push15=, $13, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.shl $push18=, $12, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.shl $push21=, $11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.shl $push24=, $10, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.shl $push27=, $8, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.shl $push30=, $7, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.shl $push33=, $6, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.shl $push36=, $4, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.and $push18=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push17=, $17=, $pop18 +; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop17 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $15, $17 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $14, $17 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $13, $17 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.shl $push5=, $12, $17 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.shl $push6=, $11, $17 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.shl $push7=, $10, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.shl $push8=, $9, $17 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.shl $push9=, $8, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.shl $push10=, $7, $17 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.shl $push12=, $5, $17 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.shl $push13=, $4, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.shl $push14=, $3, $17 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.shl $push15=, $2, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v16i8: ; NO-SIMD128-FAST: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $17, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push39=, $17=, $pop40 -; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $17=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.shl $push17=, $9, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.shl $push23=, $11, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $12, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $13, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $14, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shl $push35=, $15, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $16, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -2890,75 +2362,53 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { ; NO-SIMD128: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop53 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $3, $pop52 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $2, $pop51 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, 5 -; NO-SIMD128-NEXT: i32.shl $push5=, $1, $pop50 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.shl $push6=, $16, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, 5 -; NO-SIMD128-NEXT: i32.shl $push9=, $15, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, 5 -; NO-SIMD128-NEXT: i32.shl $push12=, $14, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, 5 -; NO-SIMD128-NEXT: i32.shl $push15=, $13, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, 5 -; NO-SIMD128-NEXT: i32.shl $push18=, $12, $pop45 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, 5 -; NO-SIMD128-NEXT: i32.shl $push21=, $11, $pop44 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, 5 -; NO-SIMD128-NEXT: i32.shl $push24=, $10, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, 5 -; NO-SIMD128-NEXT: i32.shl $push27=, $8, $pop42 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, 5 -; NO-SIMD128-NEXT: i32.shl $push30=, $7, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push40=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop40 -; NO-SIMD128-NEXT: i32.const $push39=, 5 -; NO-SIMD128-NEXT: i32.shl $push33=, $6, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop33 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.const $push38=, 5 -; NO-SIMD128-NEXT: i32.shl $push35=, $4, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $15, $pop31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $14, $pop30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $13, $pop29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $12, $pop28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, 5 +; NO-SIMD128-NEXT: i32.shl $push6=, $11, $pop27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, 5 +; NO-SIMD128-NEXT: i32.shl $push7=, $10, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, 5 +; NO-SIMD128-NEXT: i32.shl $push8=, $9, $pop25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, 5 +; NO-SIMD128-NEXT: i32.shl $push9=, $8, $pop24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, 5 +; NO-SIMD128-NEXT: i32.shl $push10=, $7, $pop23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, 5 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, 5 +; NO-SIMD128-NEXT: i32.shl $push12=, $5, $pop21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, 5 +; NO-SIMD128-NEXT: i32.shl $push13=, $4, $pop20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, 5 +; NO-SIMD128-NEXT: i32.shl $push14=, $3, $pop19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.shl $push15=, $2, $pop18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 5 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v16i8: @@ -2967,73 +2417,51 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop31 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop30 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop50 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $6, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $7, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $8, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $9, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $10, $pop44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $12, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $14, $pop40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $15, $pop39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $16, $pop38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, @@ -3248,91 +2676,69 @@ define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 -; NO-SIMD128-NEXT: i32.shl $push2=, $9, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, 255 -; NO-SIMD128-NEXT: i32.and $push3=, $21, $pop69 -; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop3 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $19, $pop68 -; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop5 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop67 -; NO-SIMD128-NEXT: i32.shl $push8=, $2, $pop7 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, 255 -; NO-SIMD128-NEXT: i32.and $push9=, $17, $pop66 -; NO-SIMD128-NEXT: i32.shl $push10=, $1, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $32, $pop65 -; NO-SIMD128-NEXT: i32.shl $push12=, $16, $pop11 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, 255 -; NO-SIMD128-NEXT: i32.and $push15=, $31, $pop64 -; NO-SIMD128-NEXT: i32.shl $push16=, $15, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $30, $pop63 -; NO-SIMD128-NEXT: i32.shl $push20=, $14, $pop19 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $29, $pop62 -; NO-SIMD128-NEXT: i32.shl $push24=, $13, $pop23 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $28, $pop61 -; NO-SIMD128-NEXT: i32.shl $push28=, $12, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop60 -; NO-SIMD128-NEXT: i32.shl $push32=, $11, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, 255 -; NO-SIMD128-NEXT: i32.and $push35=, $26, $pop59 -; NO-SIMD128-NEXT: i32.shl $push36=, $10, $pop35 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, 255 -; NO-SIMD128-NEXT: i32.and $push39=, $24, $pop58 -; NO-SIMD128-NEXT: i32.shl $push40=, $8, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $23, $pop57 -; NO-SIMD128-NEXT: i32.shl $push44=, $7, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $22, $pop56 -; NO-SIMD128-NEXT: i32.shl $push48=, $6, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $20, $pop55 -; NO-SIMD128-NEXT: i32.shl $push52=, $4, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $16, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $31, $pop47 +; NO-SIMD128-NEXT: i32.shl $push4=, $15, $pop3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $30, $pop46 +; NO-SIMD128-NEXT: i32.shl $push6=, $14, $pop5 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $29, $pop45 +; NO-SIMD128-NEXT: i32.shl $push8=, $13, $pop7 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $28, $pop44 +; NO-SIMD128-NEXT: i32.shl $push10=, $12, $pop9 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $27, $pop43 +; NO-SIMD128-NEXT: i32.shl $push12=, $11, $pop11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $26, $pop42 +; NO-SIMD128-NEXT: i32.shl $push14=, $10, $pop13 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $25, $pop41 +; NO-SIMD128-NEXT: i32.shl $push16=, $9, $pop15 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $24, $pop40 +; NO-SIMD128-NEXT: i32.shl $push18=, $8, $pop17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $23, $pop39 +; NO-SIMD128-NEXT: i32.shl $push20=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $22, $pop38 +; NO-SIMD128-NEXT: i32.shl $push22=, $6, $pop21 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $21, $pop37 +; NO-SIMD128-NEXT: i32.shl $push24=, $5, $pop23 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop36 +; NO-SIMD128-NEXT: i32.shl $push26=, $4, $pop25 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $19, $pop35 +; NO-SIMD128-NEXT: i32.shl $push28=, $3, $pop27 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $18, $pop34 +; NO-SIMD128-NEXT: i32.shl $push30=, $2, $pop29 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop33 +; NO-SIMD128-NEXT: i32.shl $push32=, $1, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v16i8: @@ -3342,88 +2748,66 @@ define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop47 ; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop46 ; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $20, $pop67 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $21, $pop66 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $22, $pop65 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop64 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $24, $pop63 -; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop62 -; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $9, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $26, $pop61 -; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $10, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $27, $pop60 -; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $11, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $29, $pop58 -; NO-SIMD128-FAST-NEXT: i32.shl $push42=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $30, $pop57 -; NO-SIMD128-FAST-NEXT: i32.shl $push46=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push49=, $31, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shl $push50=, $15, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $32, $pop55 -; NO-SIMD128-FAST-NEXT: i32.shl $push54=, $16, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $20, $pop45 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $21, $pop44 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $22, $pop43 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $23, $pop42 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $24, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $25, $pop40 +; NO-SIMD128-FAST-NEXT: i32.shl $push18=, $9, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $12, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $29, $pop36 +; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $13, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $30, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $31, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $15, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $32, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, %x ret <16 x i8> %a @@ -3445,79 +2829,57 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: shr_s_v16i8: ; NO-SIMD128: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $17, $pop0 -; NO-SIMD128-NEXT: local.tee $push55=, $17=, $pop56 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop55 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $5 +; NO-SIMD128-NEXT: i32.and $push34=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push33=, $17=, $pop34 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop33 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $15 ; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.extend8_s $push5=, $3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $14 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $13 ; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $1 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $12 ; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $16 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $11 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.extend8_s $push15=, $15 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $10 +; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $9 ; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $14 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $8 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $7 ; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $13 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $5 ; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $12 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $4 +; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop25, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.extend8_s $push27=, $3 ; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $2 +; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $1 ; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop31, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.extend8_s $push35=, $10 -; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 -; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 -; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop43, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $6 -; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.extend8_s $push51=, $4 -; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop51, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v16i8: @@ -3525,9 +2887,9 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $17, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push55=, $1=, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop55 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push33=, $1=, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop33 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push3=, $2 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 @@ -3535,67 +2897,45 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $3 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $5 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $6 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $8 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $9 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $10 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $12 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $13 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push27=, $14 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push28=, $pop27, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $15 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $11 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop33, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $12 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $13 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $14 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop45, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $16 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push54=, $pop53, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -3811,108 +3151,86 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v16i8: ; NO-SIMD128: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push2=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push2=, $16 ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0 ; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 -; NO-SIMD128-NEXT: i32.extend8_s $push5=, $5 -; NO-SIMD128-NEXT: i32.const $push85=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop85 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop63 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-NEXT: i32.extend8_s $push8=, $3 -; NO-SIMD128-NEXT: i32.const $push84=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $14 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop62 ; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $2 -; NO-SIMD128-NEXT: i32.const $push83=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop83 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $13 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop61 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 -; NO-SIMD128-NEXT: i32.extend8_s $push14=, $1 -; NO-SIMD128-NEXT: i32.const $push82=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop82 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push14=, $12 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop60 ; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 15 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $16 -; NO-SIMD128-NEXT: i32.const $push81=, 255 -; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop81 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $11 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop59 ; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 14 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $15 -; NO-SIMD128-NEXT: i32.const $push80=, 255 -; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop80 -; NO-SIMD128-NEXT: i32.shr_s $push23=, $pop22, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 13 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $14 -; NO-SIMD128-NEXT: i32.const $push79=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop79 -; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 12 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push32=, $13 -; NO-SIMD128-NEXT: i32.const $push78=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop78 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $10 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop58 +; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $9 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop57 +; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push26=, $8 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop56 +; NO-SIMD128-NEXT: i32.shr_s $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $7 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop55 +; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $6 +; NO-SIMD128-NEXT: i32.const $push54=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop54 ; NO-SIMD128-NEXT: i32.shr_s $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push39=, 11 -; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $12 -; NO-SIMD128-NEXT: i32.const $push77=, 255 -; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop77 -; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-NEXT: i32.const $push44=, 10 -; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $11 -; NO-SIMD128-NEXT: i32.const $push76=, 255 -; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop76 -; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop42, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-NEXT: i32.const $push49=, 9 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $10 -; NO-SIMD128-NEXT: i32.const $push75=, 255 -; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop75 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push35=, $5 +; NO-SIMD128-NEXT: i32.const $push53=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop53 +; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.extend8_s $push38=, $4 +; NO-SIMD128-NEXT: i32.const $push52=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop52 +; NO-SIMD128-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $3 +; NO-SIMD128-NEXT: i32.const $push51=, 255 +; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop51 +; NO-SIMD128-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $2 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop50 +; NO-SIMD128-NEXT: i32.shr_s $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.extend8_s $push47=, $1 +; NO-SIMD128-NEXT: i32.const $push49=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49 ; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push54=, 7 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $8 -; NO-SIMD128-NEXT: i32.const $push74=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop74 -; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop52, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-NEXT: i32.const $push59=, 6 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.extend8_s $push57=, $7 -; NO-SIMD128-NEXT: i32.const $push73=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop73 -; NO-SIMD128-NEXT: i32.shr_s $push58=, $pop57, $pop56 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.const $push64=, 5 -; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-NEXT: i32.extend8_s $push62=, $6 -; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop72 -; NO-SIMD128-NEXT: i32.shr_s $push63=, $pop62, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-NEXT: i32.const $push69=, 3 -; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 -; NO-SIMD128-NEXT: i32.extend8_s $push67=, $4 -; NO-SIMD128-NEXT: i32.const $push71=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 -; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop67, $pop66 -; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v16i8: @@ -3924,102 +3242,80 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop63 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push8=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop62 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $20, $pop83 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop82 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $22, $pop81 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop80 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop58 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop56 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $24, $pop79 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop29), $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $9 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop78 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push39=, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $26, $pop77 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop39, $pop38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $27, $pop76 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop54 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop53 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push38=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop52 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop51 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop50 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push48=, $28, $pop75 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push54=, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $29, $pop74 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop54, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push59=, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $30, $pop73 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push60=, $pop59, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop57), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push64=, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $31, $pop72 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $32, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push47=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = ashr <16 x i8> %v, %x ret <16 x i8> %a @@ -4042,94 +3338,72 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push71=, $17, $pop72 -; NO-SIMD128-NEXT: local.tee $push70=, $17=, $pop71 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop70 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, 255 -; NO-SIMD128-NEXT: i32.and $push3=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $17, $pop50 +; NO-SIMD128-NEXT: local.tee $push48=, $17=, $pop49 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop48 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop47 ; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop46 ; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $2, $pop67 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop45 ; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, 255 -; NO-SIMD128-NEXT: i32.and $push9=, $1, $pop66 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop44 ; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $16, $pop65 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop43 ; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, 255 -; NO-SIMD128-NEXT: i32.and $push15=, $15, $pop64 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop42 +; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop41 ; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $14, $pop63 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $8, $pop40 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop39 ; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $13, $pop62 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $6, $pop38 +; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $5, $pop37 ; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $12, $pop61 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $4, $pop36 +; NO-SIMD128-NEXT: i32.shr_u $push26=, $pop25, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $3, $pop35 ; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $2, $pop34 +; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $1, $pop33 ; NO-SIMD128-NEXT: i32.shr_u $push32=, $pop31, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, 255 -; NO-SIMD128-NEXT: i32.and $push35=, $10, $pop59 -; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, 255 -; NO-SIMD128-NEXT: i32.and $push39=, $8, $pop58 -; NO-SIMD128-NEXT: i32.shr_u $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $7, $pop57 -; NO-SIMD128-NEXT: i32.shr_u $push44=, $pop43, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $6, $pop56 -; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $4, $pop55 -; NO-SIMD128-NEXT: i32.shr_u $push52=, $pop51, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v16i8: @@ -4137,93 +3411,71 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $17, $pop72 -; NO-SIMD128-FAST-NEXT: local.tee $push70=, $1=, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $17, $pop50 +; NO-SIMD128-FAST-NEXT: local.tee $push48=, $1=, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop48 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop47 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop46 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop45 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop44 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop43 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop65 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop42 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop64 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $9, $pop40 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $11, $pop38 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $12, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $13, $pop36 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push27=, $10, $pop61 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $14, $pop35 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push28=, $pop27, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push32=, $pop31, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push37=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop59 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push39=, $13, $pop58 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $14, $pop57 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push44=, $pop43, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $15, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $16, $pop55 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push52=, $pop51, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -4440,123 +3692,101 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop0 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop101 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $5, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop99 -; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $3, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop97 -; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $2, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop95 -; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $1, $pop94 -; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop93 -; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 15 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push17=, $16, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop91 -; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 14 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push22=, $15, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop89 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 13 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $14, $pop88 -; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop87 -; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 12 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push86=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $13, $pop86 -; NO-SIMD128-NEXT: i32.const $push85=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop85 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push39=, 11 -; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-NEXT: i32.const $push84=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $12, $pop84 -; NO-SIMD128-NEXT: i32.const $push83=, 255 -; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop83 -; NO-SIMD128-NEXT: i32.shr_u $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-NEXT: i32.const $push44=, 10 -; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-NEXT: i32.const $push82=, 255 -; NO-SIMD128-NEXT: i32.and $push42=, $11, $pop82 -; NO-SIMD128-NEXT: i32.const $push81=, 255 -; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop81 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-NEXT: i32.const $push49=, 9 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push80=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $10, $pop80 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 ; NO-SIMD128-NEXT: i32.const $push79=, 255 -; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop79 -; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push54=, 7 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop79 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push78=, 255 -; NO-SIMD128-NEXT: i32.and $push52=, $8, $pop78 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop78 ; NO-SIMD128-NEXT: i32.const $push77=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop77 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-NEXT: i32.const $push59=, 6 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop77 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push76=, 255 -; NO-SIMD128-NEXT: i32.and $push57=, $7, $pop76 +; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop76 ; NO-SIMD128-NEXT: i32.const $push75=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop75 -; NO-SIMD128-NEXT: i32.shr_u $push58=, $pop57, $pop56 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.const $push64=, 5 -; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop75 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push74=, 255 -; NO-SIMD128-NEXT: i32.and $push62=, $6, $pop74 +; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop74 ; NO-SIMD128-NEXT: i32.const $push73=, 255 -; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop73 -; NO-SIMD128-NEXT: i32.shr_u $push63=, $pop62, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-NEXT: i32.const $push69=, 3 -; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop73 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push67=, $4, $pop72 +; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop72 ; NO-SIMD128-NEXT: i32.const $push71=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 -; NO-SIMD128-NEXT: i32.shr_u $push68=, $pop67, $pop66 -; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop71 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop69 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop67 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop65 +; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push64=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $8, $pop64 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop63 +; NO-SIMD128-NEXT: i32.shr_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $7, $pop62 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop61 +; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push32=, $6, $pop60 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop59 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push35=, $5, $pop58 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop57 +; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $pop34 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $4, $pop56 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop55 +; NO-SIMD128-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.const $push54=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $3, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 255 +; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop53 +; NO-SIMD128-NEXT: i32.shr_u $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.const $push52=, 255 +; NO-SIMD128-NEXT: i32.and $push44=, $2, $pop52 +; NO-SIMD128-NEXT: i32.const $push51=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop51 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $1, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49 +; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v16i8: @@ -4564,122 +3794,100 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop79 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop99 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop97 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop95 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop93 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop91 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop89 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop87 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop86 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop85 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $pop84 -; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $26, $pop83 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push38=, $pop37, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop82 -; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop81 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $12, $pop80 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $28, $pop79 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 ; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $13, $pop78 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop78 ; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $29, $pop77 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop77 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $14, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop76 ; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $30, $pop75 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop75 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $15, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop74 ; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push61=, $31, $pop73 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop61 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop73 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 ; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push67=, $16, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop72 ; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push66=, $32, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push68=, $pop67, $pop66 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop69 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop65 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop64 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop63 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $10, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $13, $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $14, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop53 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $15, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop51 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $16, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = lshr <16 x i8> %v, %x ret <16 x i8> %a @@ -4701,60 +3909,38 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: and_v16i8: ; NO-SIMD128: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.and $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.and $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.and $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.and $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.and $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.and $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.and $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.and $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.and $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.and $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.and $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.and $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.and $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.and $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.and $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.and $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.and $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.and $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.and $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v16i8: @@ -4766,54 +3952,32 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = and <16 x i8> %x, %y ret <16 x i8> %a @@ -4835,60 +3999,38 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: or_v16i8: ; NO-SIMD128: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.or $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.or $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.or $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.or $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.or $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.or $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.or $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.or $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.or $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.or $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.or $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.or $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.or $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.or $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.or $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.or $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.or $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.or $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.or $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.or $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.or $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.or $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.or $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.or $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.or $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.or $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v16i8: @@ -4900,54 +4042,32 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.or $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.or $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.or $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.or $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.or $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.or $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.or $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.or $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.or $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.or $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.or $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.or $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = or <16 x i8> %x, %y ret <16 x i8> %a @@ -4969,60 +4089,38 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: xor_v16i8: ; NO-SIMD128: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.xor $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.xor $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.xor $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.xor $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.xor $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.xor $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.xor $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.xor $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.xor $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.xor $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.xor $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.xor $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.xor $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.xor $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.xor $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.xor $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.xor $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.xor $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v16i8: @@ -5034,54 +4132,32 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, %y ret <16 x i8> %a @@ -5104,75 +4180,53 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) { ; NO-SIMD128: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $5, $pop53 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop52 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $2, $pop51 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $1, $pop50 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, -1 -; NO-SIMD128-NEXT: i32.xor $push6=, $16, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $15, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $13, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop45 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop44 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push24=, $10, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push27=, $8, $pop42 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push30=, $7, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push40=, -1 -; NO-SIMD128-NEXT: i32.xor $push33=, $6, $pop40 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push39=, -1 -; NO-SIMD128-NEXT: i32.xor $push36=, $4, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $15, $pop31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $14, $pop30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $13, $pop29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $12, $pop28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $11, $pop27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $10, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $9, $pop25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $8, $pop24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $7, $pop23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push12=, $5, $pop21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $4, $pop20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $3, $pop19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $2, $pop18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v16i8: @@ -5181,73 +4235,51 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop31 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop30 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop50 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $9, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $10, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $11, $pop44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $12, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $13, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $14, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $15, $pop40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $16, $pop39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $9, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $10, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $11, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -5274,91 +4306,69 @@ define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128: .functype andnot_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $25, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $21, $pop69 -; NO-SIMD128-NEXT: i32.and $push4=, $5, $pop3 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $19, $pop68 -; NO-SIMD128-NEXT: i32.and $push6=, $3, $pop5 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $18, $pop67 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop7 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $17, $pop66 -; NO-SIMD128-NEXT: i32.and $push10=, $1, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $32, $pop65 -; NO-SIMD128-NEXT: i32.and $push12=, $16, $pop11 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $31, $pop64 -; NO-SIMD128-NEXT: i32.and $push16=, $15, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $30, $pop63 -; NO-SIMD128-NEXT: i32.and $push20=, $14, $pop19 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, -1 -; NO-SIMD128-NEXT: i32.xor $push23=, $29, $pop62 -; NO-SIMD128-NEXT: i32.and $push24=, $13, $pop23 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, -1 -; NO-SIMD128-NEXT: i32.xor $push27=, $28, $pop61 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, -1 -; NO-SIMD128-NEXT: i32.xor $push31=, $27, $pop60 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, -1 -; NO-SIMD128-NEXT: i32.xor $push35=, $26, $pop59 -; NO-SIMD128-NEXT: i32.and $push36=, $10, $pop35 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, -1 -; NO-SIMD128-NEXT: i32.xor $push39=, $24, $pop58 -; NO-SIMD128-NEXT: i32.and $push40=, $8, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, -1 -; NO-SIMD128-NEXT: i32.xor $push43=, $23, $pop57 -; NO-SIMD128-NEXT: i32.and $push44=, $7, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, -1 -; NO-SIMD128-NEXT: i32.xor $push47=, $22, $pop56 -; NO-SIMD128-NEXT: i32.and $push48=, $6, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, -1 -; NO-SIMD128-NEXT: i32.xor $push51=, $20, $pop55 -; NO-SIMD128-NEXT: i32.and $push52=, $4, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.xor $push1=, $32, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $31, $pop47 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $30, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $14, $pop5 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $29, $pop45 +; NO-SIMD128-NEXT: i32.and $push8=, $13, $pop7 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $28, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop9 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $27, $pop43 +; NO-SIMD128-NEXT: i32.and $push12=, $11, $pop11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $26, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $10, $pop13 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $25, $pop41 +; NO-SIMD128-NEXT: i32.and $push16=, $9, $pop15 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $24, $pop40 +; NO-SIMD128-NEXT: i32.and $push18=, $8, $pop17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push19=, $23, $pop39 +; NO-SIMD128-NEXT: i32.and $push20=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $22, $pop38 +; NO-SIMD128-NEXT: i32.and $push22=, $6, $pop21 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push23=, $21, $pop37 +; NO-SIMD128-NEXT: i32.and $push24=, $5, $pop23 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $20, $pop36 +; NO-SIMD128-NEXT: i32.and $push26=, $4, $pop25 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push27=, $19, $pop35 +; NO-SIMD128-NEXT: i32.and $push28=, $3, $pop27 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $18, $pop34 +; NO-SIMD128-NEXT: i32.and $push30=, $2, $pop29 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push31=, $17, $pop33 +; NO-SIMD128-NEXT: i32.and $push32=, $1, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v16i8: @@ -5368,88 +4378,66 @@ define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $17, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop47 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop46 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $pop67 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $21, $pop66 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $pop65 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $23, $pop64 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $24, $pop63 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $25, $pop62 -; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $26, $pop61 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $10, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $27, $pop60 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $11, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $29, $pop58 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $30, $pop57 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $31, $pop56 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $15, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $32, $pop55 -; NO-SIMD128-FAST-NEXT: i32.and $push54=, $16, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $20, $pop45 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $21, $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $22, $pop43 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $23, $pop42 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $24, $pop41 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $25, $pop40 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $10, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $12, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $29, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $13, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $30, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $31, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $15, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $32, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -5477,124 +4465,102 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: bitselect_v16i8: ; NO-SIMD128: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.and $push0=, $16, $32 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $48 ; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push101=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $pop101 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $47 -; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push100=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $14, $pop100 -; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $46 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push79=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $15, $pop79 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $47 +; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push78=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $14, $pop78 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $46 +; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push77=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $13, $pop77 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $45 ; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push99=, -1 -; NO-SIMD128-NEXT: i32.xor $push20=, $13, $pop99 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $45 -; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.and $push25=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push98=, -1 -; NO-SIMD128-NEXT: i32.xor $push26=, $12, $pop98 -; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $44 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push76=, -1 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop76 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $44 +; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push75=, -1 +; NO-SIMD128-NEXT: i32.xor $push22=, $11, $pop75 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $43 +; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push74=, -1 +; NO-SIMD128-NEXT: i32.xor $push26=, $10, $pop74 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $42 ; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.and $push31=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push97=, -1 -; NO-SIMD128-NEXT: i32.xor $push32=, $11, $pop97 -; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $43 -; NO-SIMD128-NEXT: i32.or $push34=, $pop31, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.and $push37=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push96=, -1 -; NO-SIMD128-NEXT: i32.xor $push38=, $10, $pop96 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push73=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $9, $pop73 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $41 +; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push33=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push72=, -1 +; NO-SIMD128-NEXT: i32.xor $push34=, $8, $pop72 +; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $40 +; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push37=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push71=, -1 +; NO-SIMD128-NEXT: i32.xor $push38=, $7, $pop71 +; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $39 ; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.and $push43=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push95=, -1 -; NO-SIMD128-NEXT: i32.xor $push44=, $9, $pop95 -; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $41 -; NO-SIMD128-NEXT: i32.or $push46=, $pop43, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.and $push47=, $8, $24 -; NO-SIMD128-NEXT: i32.const $push94=, -1 -; NO-SIMD128-NEXT: i32.xor $push48=, $8, $pop94 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $40 -; NO-SIMD128-NEXT: i32.or $push50=, $pop47, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.and $push53=, $7, $23 -; NO-SIMD128-NEXT: i32.const $push93=, -1 -; NO-SIMD128-NEXT: i32.xor $push54=, $7, $pop93 -; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push41=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push70=, -1 +; NO-SIMD128-NEXT: i32.xor $push42=, $6, $pop70 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $38 +; NO-SIMD128-NEXT: i32.or $push44=, $pop41, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.and $push45=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push69=, -1 +; NO-SIMD128-NEXT: i32.xor $push46=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $37 +; NO-SIMD128-NEXT: i32.or $push48=, $pop45, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.and $push49=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push68=, -1 +; NO-SIMD128-NEXT: i32.xor $push50=, $4, $pop68 +; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $36 +; NO-SIMD128-NEXT: i32.or $push52=, $pop49, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.and $push53=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push67=, -1 +; NO-SIMD128-NEXT: i32.xor $push54=, $3, $pop67 +; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $35 ; NO-SIMD128-NEXT: i32.or $push56=, $pop53, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.and $push59=, $6, $22 -; NO-SIMD128-NEXT: i32.const $push92=, -1 -; NO-SIMD128-NEXT: i32.xor $push60=, $6, $pop92 -; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $38 -; NO-SIMD128-NEXT: i32.or $push62=, $pop59, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.and $push65=, $5, $21 -; NO-SIMD128-NEXT: i32.const $push91=, -1 -; NO-SIMD128-NEXT: i32.xor $push66=, $5, $pop91 -; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $37 -; NO-SIMD128-NEXT: i32.or $push68=, $pop65, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-NEXT: i32.and $push69=, $4, $20 -; NO-SIMD128-NEXT: i32.const $push90=, -1 -; NO-SIMD128-NEXT: i32.xor $push70=, $4, $pop90 -; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $36 -; NO-SIMD128-NEXT: i32.or $push72=, $pop69, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-NEXT: i32.and $push75=, $3, $19 -; NO-SIMD128-NEXT: i32.const $push89=, -1 -; NO-SIMD128-NEXT: i32.xor $push76=, $3, $pop89 -; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $35 -; NO-SIMD128-NEXT: i32.or $push78=, $pop75, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 -; NO-SIMD128-NEXT: i32.and $push79=, $2, $18 -; NO-SIMD128-NEXT: i32.const $push88=, -1 -; NO-SIMD128-NEXT: i32.xor $push80=, $2, $pop88 -; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $34 -; NO-SIMD128-NEXT: i32.or $push82=, $pop79, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 -; NO-SIMD128-NEXT: i32.and $push83=, $1, $17 -; NO-SIMD128-NEXT: i32.const $push87=, -1 -; NO-SIMD128-NEXT: i32.xor $push84=, $1, $pop87 -; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $33 -; NO-SIMD128-NEXT: i32.or $push86=, $pop83, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.and $push57=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push66=, -1 +; NO-SIMD128-NEXT: i32.xor $push58=, $2, $pop66 +; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $34 +; NO-SIMD128-NEXT: i32.or $push60=, $pop57, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.and $push61=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push65=, -1 +; NO-SIMD128-NEXT: i32.xor $push62=, $1, $pop65 +; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $33 +; NO-SIMD128-NEXT: i32.or $push64=, $pop61, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v16i8: @@ -5607,117 +4573,95 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop79 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $34 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop78 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $35 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop77 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $36 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop98 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $37 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop97 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $38 -; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop96 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $37 +; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop75 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $38 +; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $39 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop73 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $40 ; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop95 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $40 -; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $9, $pop94 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $9, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $41 +; NO-SIMD128-FAST-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $10, $pop71 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-FAST-NEXT: i32.or $push40=, $pop37, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $11, $pop70 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $43 ; NO-SIMD128-FAST-NEXT: i32.or $push44=, $pop41, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $10, $pop93 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop69 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $44 ; NO-SIMD128-FAST-NEXT: i32.or $push48=, $pop45, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $11, $pop92 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $43 -; NO-SIMD128-FAST-NEXT: i32.or $push54=, $pop51, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $12, $pop91 -; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $13, $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $45 +; NO-SIMD128-FAST-NEXT: i32.or $push52=, $pop49, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $14, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $46 +; NO-SIMD128-FAST-NEXT: i32.or $push56=, $pop53, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $15, $pop66 +; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $47 ; NO-SIMD128-FAST-NEXT: i32.or $push60=, $pop57, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $13, $pop90 -; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $45 -; NO-SIMD128-FAST-NEXT: i32.or $push66=, $pop63, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $14, $pop89 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $46 -; NO-SIMD128-FAST-NEXT: i32.or $push72=, $pop69, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $15, $pop88 -; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $47 -; NO-SIMD128-FAST-NEXT: i32.or $push78=, $pop75, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $16, $pop87 -; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $48 -; NO-SIMD128-FAST-NEXT: i32.or $push84=, $pop81, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $16, $pop65 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $48 +; NO-SIMD128-FAST-NEXT: i32.or $push64=, $pop61, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <16 x i8> %c, %v1 %inv_mask = xor <16 x i8> %c, @@ -5746,92 +4690,70 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v16i8: ; NO-SIMD128: .functype bitselect_xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 15 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $32, $48 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $16 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $48 -; NO-SIMD128-NEXT: i32.store8 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push5=, $31, $47 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $15 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $47 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46 -; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $14 -; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 12 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.xor $push15=, $29, $45 -; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $13 -; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $45 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push23=, 11 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.xor $push20=, $28, $44 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $12 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $44 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push28=, 10 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.xor $push25=, $27, $43 -; NO-SIMD128-NEXT: i32.and $push26=, $pop25, $11 -; NO-SIMD128-NEXT: i32.xor $push27=, $pop26, $43 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push33=, 9 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.xor $push30=, $26, $42 -; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $10 -; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $42 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.xor $push35=, $25, $41 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $9 -; NO-SIMD128-NEXT: i32.xor $push37=, $pop36, $41 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop37 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.xor $push38=, $24, $40 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $8 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $40 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push46=, 6 -; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-NEXT: i32.xor $push43=, $23, $39 -; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $7 -; NO-SIMD128-NEXT: i32.xor $push45=, $pop44, $39 -; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.xor $push48=, $22, $38 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $6 -; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $38 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.xor $push53=, $21, $37 -; NO-SIMD128-NEXT: i32.and $push54=, $pop53, $5 -; NO-SIMD128-NEXT: i32.xor $push55=, $pop54, $37 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 -; NO-SIMD128-NEXT: i32.const $push59=, 3 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.xor $push56=, $20, $36 -; NO-SIMD128-NEXT: i32.and $push57=, $pop56, $4 -; NO-SIMD128-NEXT: i32.xor $push58=, $pop57, $36 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.xor $push61=, $19, $35 -; NO-SIMD128-NEXT: i32.and $push62=, $pop61, $3 -; NO-SIMD128-NEXT: i32.xor $push63=, $pop62, $35 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 -; NO-SIMD128-NEXT: i32.xor $push64=, $18, $34 -; NO-SIMD128-NEXT: i32.and $push65=, $pop64, $2 -; NO-SIMD128-NEXT: i32.xor $push66=, $pop65, $34 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 -; NO-SIMD128-NEXT: i32.xor $push67=, $17, $33 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $1 -; NO-SIMD128-NEXT: i32.xor $push69=, $pop68, $33 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $31, $47 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $15 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $47 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $30, $46 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $14 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $46 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $29, $45 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $13 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $45 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $28, $44 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $12 +; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $44 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $27, $43 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $11 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $43 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop17 +; NO-SIMD128-NEXT: i32.xor $push18=, $26, $42 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $10 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push21=, $25, $41 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $9 +; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop23 +; NO-SIMD128-NEXT: i32.xor $push24=, $24, $40 +; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $8 +; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $40 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop26 +; NO-SIMD128-NEXT: i32.xor $push27=, $23, $39 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $7 +; NO-SIMD128-NEXT: i32.xor $push29=, $pop28, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-NEXT: i32.xor $push30=, $22, $38 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $6 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $38 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop32 +; NO-SIMD128-NEXT: i32.xor $push33=, $21, $37 +; NO-SIMD128-NEXT: i32.and $push34=, $pop33, $5 +; NO-SIMD128-NEXT: i32.xor $push35=, $pop34, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop35 +; NO-SIMD128-NEXT: i32.xor $push36=, $20, $36 +; NO-SIMD128-NEXT: i32.and $push37=, $pop36, $4 +; NO-SIMD128-NEXT: i32.xor $push38=, $pop37, $36 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop38 +; NO-SIMD128-NEXT: i32.xor $push39=, $19, $35 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $3 +; NO-SIMD128-NEXT: i32.xor $push41=, $pop40, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop41 +; NO-SIMD128-NEXT: i32.xor $push42=, $18, $34 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $2 +; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop44 +; NO-SIMD128-NEXT: i32.xor $push45=, $17, $33 +; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $1 +; NO-SIMD128-NEXT: i32.xor $push47=, $pop46, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop47 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v16i8: @@ -5849,80 +4771,58 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $35 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $20, $36 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $21, $37 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $37 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $22, $38 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $23, $39 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $24, $40 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop31 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $25, $41 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $9 -; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $pop33, $41 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $26, $42 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $10 -; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $pop38, $42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $11 -; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop41), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $28, $44 -; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $12 -; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $pop48, $44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $29, $45 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $13 -; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop51), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $30, $46 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $14 -; NO-SIMD128-FAST-NEXT: i32.xor $push59=, $pop58, $46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $31, $47 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $15 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop64 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $0, $pop65 -; NO-SIMD128-FAST-NEXT: i32.xor $push67=, $32, $48 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $16 -; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $pop68, $48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop66), $pop69 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $36 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $36 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $9 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $10 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $pop28, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $11 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $12 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $pop34, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $13 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $14 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $pop40, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $15 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $pop45, $16 +; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $pop46, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop47 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %and = and <16 x i8> %xor1, %c @@ -5949,124 +4849,102 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 ; NO-SIMD128-LABEL: bitselect_xor_reversed_v16i8: ; NO-SIMD128: .functype bitselect_xor_reversed_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $32, $48 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $48 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push8=, $31, $47 -; NO-SIMD128-NEXT: i32.const $push101=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop101 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $47 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.xor $push14=, $30, $46 -; NO-SIMD128-NEXT: i32.const $push100=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $14, $pop100 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $31, $47 +; NO-SIMD128-NEXT: i32.const $push79=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop79 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $47 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46 +; NO-SIMD128-NEXT: i32.const $push78=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $14, $pop78 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $29, $45 +; NO-SIMD128-NEXT: i32.const $push77=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $13, $pop77 ; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $46 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.xor $push20=, $29, $45 -; NO-SIMD128-NEXT: i32.const $push99=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $13, $pop99 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $45 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.xor $push26=, $28, $44 -; NO-SIMD128-NEXT: i32.const $push98=, -1 -; NO-SIMD128-NEXT: i32.xor $push25=, $12, $pop98 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $45 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 +; NO-SIMD128-NEXT: i32.xor $push18=, $28, $44 +; NO-SIMD128-NEXT: i32.const $push76=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $12, $pop76 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $44 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push22=, $27, $43 +; NO-SIMD128-NEXT: i32.const $push75=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop75 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $43 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.xor $push26=, $26, $42 +; NO-SIMD128-NEXT: i32.const $push74=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $10, $pop74 ; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $44 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.xor $push32=, $27, $43 -; NO-SIMD128-NEXT: i32.const $push97=, -1 -; NO-SIMD128-NEXT: i32.xor $push31=, $11, $pop97 -; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.xor $push34=, $pop33, $43 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.xor $push38=, $26, $42 -; NO-SIMD128-NEXT: i32.const $push96=, -1 -; NO-SIMD128-NEXT: i32.xor $push37=, $10, $pop96 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.xor $push30=, $25, $41 +; NO-SIMD128-NEXT: i32.const $push73=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $9, $pop73 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.xor $push34=, $24, $40 +; NO-SIMD128-NEXT: i32.const $push72=, -1 +; NO-SIMD128-NEXT: i32.xor $push33=, $8, $pop72 +; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $40 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.xor $push38=, $23, $39 +; NO-SIMD128-NEXT: i32.const $push71=, -1 +; NO-SIMD128-NEXT: i32.xor $push37=, $7, $pop71 ; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $42 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.xor $push44=, $25, $41 -; NO-SIMD128-NEXT: i32.const $push95=, -1 -; NO-SIMD128-NEXT: i32.xor $push43=, $9, $pop95 -; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.xor $push46=, $pop45, $41 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.xor $push48=, $24, $40 -; NO-SIMD128-NEXT: i32.const $push94=, -1 -; NO-SIMD128-NEXT: i32.xor $push47=, $8, $pop94 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $40 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.xor $push54=, $23, $39 -; NO-SIMD128-NEXT: i32.const $push93=, -1 -; NO-SIMD128-NEXT: i32.xor $push53=, $7, $pop93 +; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.xor $push42=, $22, $38 +; NO-SIMD128-NEXT: i32.const $push70=, -1 +; NO-SIMD128-NEXT: i32.xor $push41=, $6, $pop70 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $38 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.xor $push46=, $21, $37 +; NO-SIMD128-NEXT: i32.const $push69=, -1 +; NO-SIMD128-NEXT: i32.xor $push45=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.xor $push48=, $pop47, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.xor $push50=, $20, $36 +; NO-SIMD128-NEXT: i32.const $push68=, -1 +; NO-SIMD128-NEXT: i32.xor $push49=, $4, $pop68 +; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.xor $push52=, $pop51, $36 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.xor $push54=, $19, $35 +; NO-SIMD128-NEXT: i32.const $push67=, -1 +; NO-SIMD128-NEXT: i32.xor $push53=, $3, $pop67 ; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $39 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.xor $push60=, $22, $38 -; NO-SIMD128-NEXT: i32.const $push92=, -1 -; NO-SIMD128-NEXT: i32.xor $push59=, $6, $pop92 -; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.xor $push62=, $pop61, $38 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.xor $push66=, $21, $37 -; NO-SIMD128-NEXT: i32.const $push91=, -1 -; NO-SIMD128-NEXT: i32.xor $push65=, $5, $pop91 -; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.xor $push68=, $pop67, $37 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-NEXT: i32.xor $push70=, $20, $36 -; NO-SIMD128-NEXT: i32.const $push90=, -1 -; NO-SIMD128-NEXT: i32.xor $push69=, $4, $pop90 -; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.xor $push72=, $pop71, $36 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-NEXT: i32.xor $push76=, $19, $35 -; NO-SIMD128-NEXT: i32.const $push89=, -1 -; NO-SIMD128-NEXT: i32.xor $push75=, $3, $pop89 -; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.xor $push78=, $pop77, $35 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 -; NO-SIMD128-NEXT: i32.xor $push80=, $18, $34 -; NO-SIMD128-NEXT: i32.const $push88=, -1 -; NO-SIMD128-NEXT: i32.xor $push79=, $2, $pop88 -; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.xor $push82=, $pop81, $34 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 -; NO-SIMD128-NEXT: i32.xor $push84=, $17, $33 -; NO-SIMD128-NEXT: i32.const $push87=, -1 -; NO-SIMD128-NEXT: i32.xor $push83=, $1, $pop87 -; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.xor $push86=, $pop85, $33 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.xor $push58=, $18, $34 +; NO-SIMD128-NEXT: i32.const $push66=, -1 +; NO-SIMD128-NEXT: i32.xor $push57=, $2, $pop66 +; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.xor $push60=, $pop59, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.xor $push62=, $17, $33 +; NO-SIMD128-NEXT: i32.const $push65=, -1 +; NO-SIMD128-NEXT: i32.xor $push61=, $1, $pop65 +; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.xor $push64=, $pop63, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v16i8: @@ -6079,117 +4957,95 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $33 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $18, $34 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop79 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $34 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $19, $35 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop78 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $35 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $20, $36 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop77 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $21, $37 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop98 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $37 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $22, $38 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop97 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $23, $39 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop75 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop73 ; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $24, $40 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop95 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $25, $41 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $9, $pop94 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $pop35, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $10, $pop71 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.xor $push40=, $pop39, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop70 ; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $41 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $26, $42 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $10, $pop93 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $12, $pop69 ; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $27, $43 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $11, $pop92 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $28, $44 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $12, $pop91 +; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $13, $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $pop51, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $14, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $pop55, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $15, $pop66 ; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $29, $45 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push63=, $13, $pop90 -; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $pop65, $45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $30, $46 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $14, $pop89 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.xor $push72=, $pop71, $46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $31, $47 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push75=, $15, $pop88 -; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.xor $push78=, $pop77, $47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $32, $48 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push81=, $16, $pop87 -; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.xor $push84=, $pop83, $48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $16, $pop65 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %notc = xor <16 x i8> %c, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -6218,30 +5074,22 @@ define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: add_v8i16: ; NO-SIMD128: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.add $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.add $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.add $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.add $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.add $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v8i16: @@ -6253,24 +5101,16 @@ define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y ret <8 x i16> %a @@ -6292,30 +5132,22 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: sub_v8i16: ; NO-SIMD128: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.sub $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.sub $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.sub $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.sub $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.sub $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.sub $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.sub $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.sub $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.sub $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.sub $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v8i16: @@ -6327,24 +5159,16 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> %x, %y ret <8 x i16> %a @@ -6366,30 +5190,22 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: mul_v8i16: ; NO-SIMD128: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.mul $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.mul $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.mul $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.mul $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.mul $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.mul $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.mul $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.mul $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.mul $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.mul $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v8i16: @@ -6401,24 +5217,16 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = mul <8 x i16> %x, %y ret <8 x i16> %a @@ -6440,54 +5248,46 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: min_s_v8i16: ; NO-SIMD128: .functype min_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 12 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 -; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 -; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 10 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14 +; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop11 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13 ; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-NEXT: i32.const $push26=, 6 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 -; NO-SIMD128-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12 +; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop19 +; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11 +; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop23 +; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10 +; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop27 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9 ; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 -; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32 -; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 -; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 -; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v8i16: @@ -6508,39 +5308,31 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6563,70 +5355,62 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: min_u_v8i16: ; NO-SIMD128: .functype min_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 -; NO-SIMD128-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47 ; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 -; NO-SIMD128-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 -; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 -; NO-SIMD128-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 -; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 -; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 -; NO-SIMD128-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46 ; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 -; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45 +; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44 ; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 -; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43 +; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42 ; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 -; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39 +; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37 +; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35 +; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33 +; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v8i16: @@ -6634,68 +5418,60 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6718,54 +5494,46 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: max_s_v8i16: ; NO-SIMD128: .functype max_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 12 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 -; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 -; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 10 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14 +; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop11 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13 ; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-NEXT: i32.const $push26=, 6 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 -; NO-SIMD128-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12 +; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop19 +; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11 +; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop23 +; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10 +; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop27 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9 ; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 -; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32 -; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 -; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 -; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v8i16: @@ -6786,39 +5554,31 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6841,70 +5601,62 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: max_u_v8i16: ; NO-SIMD128: .functype max_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 -; NO-SIMD128-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47 ; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 -; NO-SIMD128-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 -; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 -; NO-SIMD128-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 -; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 -; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 -; NO-SIMD128-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46 ; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 -; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45 +; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44 ; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 -; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43 +; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42 ; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 -; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39 +; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37 +; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35 +; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33 +; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v8i16: @@ -6912,68 +5664,60 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6996,78 +5740,70 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: avgr_u_v8i16: ; NO-SIMD128: .functype avgr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 65534 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push63=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 -; NO-SIMD128-NEXT: i32.const $push62=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 -; NO-SIMD128-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 -; NO-SIMD128-NEXT: i32.const $push60=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 -; NO-SIMD128-NEXT: i32.const $push59=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 -; NO-SIMD128-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 -; NO-SIMD128-NEXT: i32.const $push57=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-NEXT: i32.const $push56=, 1 -; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 -; NO-SIMD128-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push55=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $7, $15 ; NO-SIMD128-NEXT: i32.const $push54=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-NEXT: i32.const $push24=, 6 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 -; NO-SIMD128-NEXT: i32.const $push53=, 1 -; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 -; NO-SIMD128-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 ; NO-SIMD128-NEXT: i32.const $push51=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 -; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 -; NO-SIMD128-NEXT: i32.const $push50=, 1 -; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $5, $13 ; NO-SIMD128-NEXT: i32.const $push48=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 -; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 -; NO-SIMD128-NEXT: i32.const $push47=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 -; NO-SIMD128-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $4, $12 ; NO-SIMD128-NEXT: i32.const $push45=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 -; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 -; NO-SIMD128-NEXT: i32.const $push44=, 1 -; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $3, $11 ; NO-SIMD128-NEXT: i32.const $push42=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-NEXT: i32.const $push40=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push39=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push36=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-NEXT: i32.const $push34=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop33 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v8i16: @@ -7078,73 +5814,65 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 ; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12 ; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13 ; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14 ; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33 ; NO-SIMD128-FAST-NEXT: return %a = add nuw <8 x i16> %x, %y %b = add nuw <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> @@ -7176,78 +5904,70 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: avgr_u_v8i16_wrap: ; NO-SIMD128: .functype avgr_u_v8i16_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 65534 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push63=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 -; NO-SIMD128-NEXT: i32.const $push62=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 -; NO-SIMD128-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 -; NO-SIMD128-NEXT: i32.const $push60=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 -; NO-SIMD128-NEXT: i32.const $push59=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 -; NO-SIMD128-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 -; NO-SIMD128-NEXT: i32.const $push57=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-NEXT: i32.const $push56=, 1 -; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 -; NO-SIMD128-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push55=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $7, $15 ; NO-SIMD128-NEXT: i32.const $push54=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-NEXT: i32.const $push24=, 6 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 -; NO-SIMD128-NEXT: i32.const $push53=, 1 -; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 -; NO-SIMD128-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 ; NO-SIMD128-NEXT: i32.const $push51=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 -; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 -; NO-SIMD128-NEXT: i32.const $push50=, 1 -; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $5, $13 ; NO-SIMD128-NEXT: i32.const $push48=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 -; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 -; NO-SIMD128-NEXT: i32.const $push47=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 -; NO-SIMD128-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $4, $12 ; NO-SIMD128-NEXT: i32.const $push45=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 -; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 -; NO-SIMD128-NEXT: i32.const $push44=, 1 -; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $3, $11 ; NO-SIMD128-NEXT: i32.const $push42=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-NEXT: i32.const $push40=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push39=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push36=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-NEXT: i32.const $push34=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop33 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v8i16_wrap: @@ -7258,73 +5978,65 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 ; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12 ; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13 ; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14 ; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33 ; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y %b = add <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> @@ -7348,70 +6060,62 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) { ; NO-SIMD128-LABEL: abs_v8i16: ; NO-SIMD128: .functype abs_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $8 ; NO-SIMD128-NEXT: i32.const $push1=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push55=, $pop0, $pop1 -; NO-SIMD128-NEXT: local.tee $push54=, $9=, $pop55 -; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop54 +; NO-SIMD128-NEXT: i32.shr_s $push47=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push46=, $9=, $pop47 +; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop46 ; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $7 -; NO-SIMD128-NEXT: i32.const $push53=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop6, $pop53 -; NO-SIMD128-NEXT: local.tee $push51=, $8=, $pop52 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop51 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.extend16_s $push11=, $6 -; NO-SIMD128-NEXT: i32.const $push50=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push49=, $pop11, $pop50 -; NO-SIMD128-NEXT: local.tee $push48=, $8=, $pop49 -; NO-SIMD128-NEXT: i32.xor $push12=, $6, $pop48 -; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 -; NO-SIMD128-NEXT: i32.extend16_s $push16=, $5 -; NO-SIMD128-NEXT: i32.const $push47=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push46=, $pop16, $pop47 -; NO-SIMD128-NEXT: local.tee $push45=, $8=, $pop46 -; NO-SIMD128-NEXT: i32.xor $push17=, $5, $pop45 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7 +; NO-SIMD128-NEXT: i32.const $push45=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop4, $pop45 +; NO-SIMD128-NEXT: local.tee $push43=, $8=, $pop44 +; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop43 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6 +; NO-SIMD128-NEXT: i32.const $push42=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push41=, $pop7, $pop42 +; NO-SIMD128-NEXT: local.tee $push40=, $8=, $pop41 +; NO-SIMD128-NEXT: i32.xor $push8=, $6, $pop40 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $8 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5 +; NO-SIMD128-NEXT: i32.const $push39=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop10, $pop39 +; NO-SIMD128-NEXT: local.tee $push37=, $8=, $pop38 +; NO-SIMD128-NEXT: i32.xor $push11=, $5, $pop37 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $8 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $4 +; NO-SIMD128-NEXT: i32.const $push36=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push35=, $pop13, $pop36 +; NO-SIMD128-NEXT: local.tee $push34=, $8=, $pop35 +; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop34 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $8 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $3 +; NO-SIMD128-NEXT: i32.const $push33=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop16, $pop33 +; NO-SIMD128-NEXT: local.tee $push31=, $8=, $pop32 +; NO-SIMD128-NEXT: i32.xor $push17=, $3, $pop31 ; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $8 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 6 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $4 -; NO-SIMD128-NEXT: i32.const $push44=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop19, $pop44 -; NO-SIMD128-NEXT: local.tee $push42=, $8=, $pop43 -; NO-SIMD128-NEXT: i32.xor $push20=, $4, $pop42 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.extend16_s $push19=, $2 +; NO-SIMD128-NEXT: i32.const $push30=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push29=, $pop19, $pop30 +; NO-SIMD128-NEXT: local.tee $push28=, $8=, $pop29 +; NO-SIMD128-NEXT: i32.xor $push20=, $2, $pop28 ; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.extend16_s $push24=, $3 -; NO-SIMD128-NEXT: i32.const $push41=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop24, $pop41 -; NO-SIMD128-NEXT: local.tee $push39=, $8=, $pop40 -; NO-SIMD128-NEXT: i32.xor $push25=, $3, $pop39 -; NO-SIMD128-NEXT: i32.sub $push26=, $pop25, $8 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push27=, $2 -; NO-SIMD128-NEXT: i32.const $push38=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push37=, $pop27, $pop38 -; NO-SIMD128-NEXT: local.tee $push36=, $8=, $pop37 -; NO-SIMD128-NEXT: i32.xor $push28=, $2, $pop36 -; NO-SIMD128-NEXT: i32.sub $push29=, $pop28, $8 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 -; NO-SIMD128-NEXT: i32.extend16_s $push30=, $1 -; NO-SIMD128-NEXT: i32.const $push35=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push34=, $pop30, $pop35 -; NO-SIMD128-NEXT: local.tee $push33=, $8=, $pop34 -; NO-SIMD128-NEXT: i32.xor $push31=, $1, $pop33 -; NO-SIMD128-NEXT: i32.sub $push32=, $pop31, $8 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push22=, $1 +; NO-SIMD128-NEXT: i32.const $push27=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop22, $pop27 +; NO-SIMD128-NEXT: local.tee $push25=, $8=, $pop26 +; NO-SIMD128-NEXT: i32.xor $push23=, $1, $pop25 +; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $8 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v8i16: @@ -7419,68 +6123,60 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push1=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop0, $pop1 -; NO-SIMD128-FAST-NEXT: local.tee $push54=, $9=, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop54 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push47=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push46=, $9=, $pop47 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop46 ; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push52=, $pop4, $pop53 -; NO-SIMD128-FAST-NEXT: local.tee $push51=, $1=, $pop52 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push44=, $pop4, $pop45 +; NO-SIMD128-FAST-NEXT: local.tee $push43=, $1=, $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop43 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push49=, $pop7, $pop50 -; NO-SIMD128-FAST-NEXT: local.tee $push48=, $2=, $pop49 -; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push41=, $pop7, $pop42 +; NO-SIMD128-FAST-NEXT: local.tee $push40=, $2=, $pop41 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop40 ; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop10, $pop47 -; NO-SIMD128-FAST-NEXT: local.tee $push45=, $3=, $pop46 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop10, $pop39 +; NO-SIMD128-FAST-NEXT: local.tee $push37=, $3=, $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push43=, $pop15, $pop44 -; NO-SIMD128-FAST-NEXT: local.tee $push42=, $4=, $pop43 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop42 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop18, $pop41 -; NO-SIMD128-FAST-NEXT: local.tee $push39=, $5=, $pop40 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop39 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push37=, $pop23, $pop38 -; NO-SIMD128-FAST-NEXT: local.tee $push36=, $6=, $pop37 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop36 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop28, $pop35 -; NO-SIMD128-FAST-NEXT: local.tee $push33=, $0=, $pop34 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33 -; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $0 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop13, $pop36 +; NO-SIMD128-FAST-NEXT: local.tee $push34=, $4=, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop34 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop16, $pop33 +; NO-SIMD128-FAST-NEXT: local.tee $push31=, $5=, $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop31 +; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push29=, $pop19, $pop30 +; NO-SIMD128-FAST-NEXT: local.tee $push28=, $6=, $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop22, $pop27 +; NO-SIMD128-FAST-NEXT: local.tee $push25=, $7=, $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> zeroinitializer, %x %b = icmp slt <8 x i16> %x, zeroinitializer @@ -7505,37 +6201,29 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) { ; NO-SIMD128: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $5 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop23, $3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop22, $2 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop21, $1 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, 0 -; NO-SIMD128-NEXT: i32.sub $push5=, $pop20, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, 0 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop19, $7 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 0 -; NO-SIMD128-NEXT: i32.sub $push11=, $pop18, $6 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, 0 -; NO-SIMD128-NEXT: i32.sub $push14=, $pop17, $4 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $8 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop15, $7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop14, $6 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop13, $5 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop12, $4 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, 0 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop11, $3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 0 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop10, $2 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, 0 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop9, $1 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v8i16: @@ -7544,35 +6232,27 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop23, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop15, $2 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop22, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop14, $3 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop21, $4 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop20, $5 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop18, $7 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $8 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop13, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop11, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop10, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop9, $8 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, %x @@ -7596,64 +6276,48 @@ define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0 -; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18 -; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop17 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.shl $push3=, $2, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.shl $push4=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.shl $push5=, $8, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.shl $push8=, $7, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.shl $push11=, $6, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.shl $push14=, $4, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push9=, $9=, $pop10 +; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop9 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $7, $9 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $6, $9 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $5, $9 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.shl $push5=, $4, $9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.shl $push6=, $3, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.shl $push7=, $2, $9 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.shl $push8=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v8i16: ; NO-SIMD128-FAST: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push17=, $9=, $pop18 -; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push9=, $9=, $pop10 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop9 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -7681,37 +6345,29 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { ; NO-SIMD128: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop23 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop22 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, 5 -; NO-SIMD128-NEXT: i32.shl $push5=, $8, $pop20 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, 5 -; NO-SIMD128-NEXT: i32.shl $push8=, $7, $pop19 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, 5 -; NO-SIMD128-NEXT: i32.shl $push14=, $4, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $7, $pop15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $6, $pop14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $4, $pop12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, 5 +; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 5 +; NO-SIMD128-NEXT: i32.shl $push7=, $2, $pop10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, 5 +; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v8i16: @@ -7720,35 +6376,27 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop14 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop17 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> @@ -7866,45 +6514,37 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-NEXT: i32.and $push3=, $11, $pop31 -; NO-SIMD128-NEXT: i32.shl $push4=, $3, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $10, $pop30 -; NO-SIMD128-NEXT: i32.shl $push6=, $2, $pop5 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop29 -; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop7 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-NEXT: i32.and $push9=, $16, $pop28 -; NO-SIMD128-NEXT: i32.shl $push10=, $8, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $15, $pop27 -; NO-SIMD128-NEXT: i32.shl $push14=, $7, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-NEXT: i32.and $push17=, $14, $pop26 -; NO-SIMD128-NEXT: i32.shl $push18=, $6, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-NEXT: i32.and $push21=, $12, $pop25 -; NO-SIMD128-NEXT: i32.shl $push22=, $4, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $8, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop23 +; NO-SIMD128-NEXT: i32.shl $push4=, $7, $pop3 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop22 +; NO-SIMD128-NEXT: i32.shl $push6=, $6, $pop5 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop21 +; NO-SIMD128-NEXT: i32.shl $push8=, $5, $pop7 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop20 +; NO-SIMD128-NEXT: i32.shl $push10=, $4, $pop9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop19 +; NO-SIMD128-NEXT: i32.shl $push12=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop18 +; NO-SIMD128-NEXT: i32.shl $push14=, $2, $pop13 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop17 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v8i16: @@ -7914,42 +6554,34 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop23 ; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop22 ; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $12, $pop29 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $13, $pop28 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $14, $pop27 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop25 -; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, %x ret <8 x i16> %a @@ -7971,41 +6603,33 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: shr_s_v8i16: ; NO-SIMD128: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push26=, $9, $pop0 -; NO-SIMD128-NEXT: local.tee $push25=, $9=, $pop26 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop25 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $3 +; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop17 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $7 ; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.extend16_s $push5=, $2 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $6 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5 ; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $8 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $4 ; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $7 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $3 +; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $2 ; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend16_s $push17=, $6 -; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.extend16_s $push21=, $4 -; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.extend16_s $push15=, $1 +; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v8i16: @@ -8013,9 +6637,9 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push25=, $1=, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop25 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop17 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push3=, $2 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 @@ -8023,29 +6647,21 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $3 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $5 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $6 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $8 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -8164,54 +6780,46 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v8i16: ; NO-SIMD128: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push2=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push2=, $8 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 ; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.extend16_s $push5=, $3 -; NO-SIMD128-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop39 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop31 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.extend16_s $push8=, $2 -; NO-SIMD128-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop38 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $6 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop30 ; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.extend16_s $push11=, $1 -; NO-SIMD128-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop37 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $5 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop29 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push14=, $8 -; NO-SIMD128-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop36 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push14=, $4 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop28 ; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $7 -; NO-SIMD128-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop35 -; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push24=, $6 -; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop34 -; NO-SIMD128-NEXT: i32.shr_s $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $4 -; NO-SIMD128-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 -; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $3 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop27 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $2 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop26 +; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push23=, $1 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25 +; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v8i16: @@ -8223,48 +6831,40 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop31 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push8=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop30 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop14 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop36 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $14, $pop35 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop19), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push26=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop34 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push31=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $16, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop29), $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push14=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop28 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = ashr <8 x i16> %v, %x ret <8 x i16> %a @@ -8287,48 +6887,40 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $9, $pop34 -; NO-SIMD128-NEXT: local.tee $push32=, $9=, $pop33 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop32 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-NEXT: i32.and $push3=, $3, $pop31 +; NO-SIMD128-NEXT: i32.and $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $9, $pop26 +; NO-SIMD128-NEXT: local.tee $push24=, $9=, $pop25 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop24 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $7, $pop23 ; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop30 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop22 ; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $1, $pop29 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $5, $pop21 ; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-NEXT: i32.and $push9=, $8, $pop28 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $4, $pop20 ; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $7, $pop27 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $3, $pop19 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $2, $pop18 ; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-NEXT: i32.and $push17=, $6, $pop26 -; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-NEXT: i32.and $push21=, $4, $pop25 -; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push15=, $1, $pop17 +; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v8i16: @@ -8336,47 +6928,39 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $pop34 -; NO-SIMD128-FAST-NEXT: local.tee $push32=, $1=, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop26 +; NO-SIMD128-FAST-NEXT: local.tee $push24=, $1=, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop24 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop23 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop22 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop21 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop19 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push15=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop18 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop25 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop17 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -8496,61 +7080,53 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop47 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop45 -; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop43 -; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop41 -; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop39 -; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop39 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop37 -; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop37 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop35 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop35 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 -; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop33 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 65535 +; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop31 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop29 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop27 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25 +; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v8i16: @@ -8558,60 +7134,52 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop39 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop45 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop43 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop41 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop39 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $14, $pop37 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $15, $pop35 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $16, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = lshr <8 x i16> %v, %x ret <8 x i16> %a @@ -8633,30 +7201,22 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: and_v8i16: ; NO-SIMD128: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.and $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.and $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.and $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.and $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.and $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.and $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.and $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v8i16: @@ -8668,24 +7228,16 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = and <8 x i16> %x, %y ret <8 x i16> %a @@ -8707,30 +7259,22 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: or_v8i16: ; NO-SIMD128: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.or $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.or $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.or $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.or $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.or $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.or $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.or $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.or $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.or $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.or $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v8i16: @@ -8742,24 +7286,16 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = or <8 x i16> %x, %y ret <8 x i16> %a @@ -8781,30 +7317,22 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: xor_v8i16: ; NO-SIMD128: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.xor $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.xor $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.xor $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.xor $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.xor $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.xor $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v8i16: @@ -8816,24 +7344,16 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, %y ret <8 x i16> %a @@ -8856,37 +7376,29 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) { ; NO-SIMD128: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop23 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop22 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $8, $pop20 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop19 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $7, $pop15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $5, $pop13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $4, $pop12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v8i16: @@ -8895,35 +7407,27 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop14 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop17 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> @@ -8948,45 +7452,37 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128: .functype andnot_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $13, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $11, $pop31 -; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $10, $pop30 -; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $9, $pop29 -; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $16, $pop28 -; NO-SIMD128-NEXT: i32.and $push10=, $8, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $15, $pop27 -; NO-SIMD128-NEXT: i32.and $push14=, $7, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, -1 -; NO-SIMD128-NEXT: i32.xor $push17=, $14, $pop26 -; NO-SIMD128-NEXT: i32.and $push18=, $6, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, -1 -; NO-SIMD128-NEXT: i32.xor $push21=, $12, $pop25 -; NO-SIMD128-NEXT: i32.and $push22=, $4, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $15, $pop23 +; NO-SIMD128-NEXT: i32.and $push4=, $7, $pop3 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $14, $pop22 +; NO-SIMD128-NEXT: i32.and $push6=, $6, $pop5 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $13, $pop21 +; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop7 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $12, $pop20 +; NO-SIMD128-NEXT: i32.and $push10=, $4, $pop9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $11, $pop19 +; NO-SIMD128-NEXT: i32.and $push12=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $10, $pop18 +; NO-SIMD128-NEXT: i32.and $push14=, $2, $pop13 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $9, $pop17 +; NO-SIMD128-NEXT: i32.and $push16=, $1, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v8i16: @@ -8996,42 +7492,34 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $9, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop23 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop22 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $pop29 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $13, $pop28 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $15, $pop26 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $16, $pop25 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> @@ -9058,62 +7546,54 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: bitselect_v8i16: ; NO-SIMD128: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.and $push0=, $16, $8 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $24, $pop2 ; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $7 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop47 -; NO-SIMD128-NEXT: i32.and $push9=, $23, $pop8 -; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $6 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $6, $pop46 -; NO-SIMD128-NEXT: i32.and $push15=, $22, $pop14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $7 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $7, $pop39 +; NO-SIMD128-NEXT: i32.and $push7=, $23, $pop6 +; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $6 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $pop38 +; NO-SIMD128-NEXT: i32.and $push11=, $22, $pop10 +; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $5 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $5, $pop37 +; NO-SIMD128-NEXT: i32.and $push15=, $21, $pop14 ; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $5 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push20=, $5, $pop45 -; NO-SIMD128-NEXT: i32.and $push21=, $21, $pop20 -; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $4 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push24=, $4, $pop44 -; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop24 -; NO-SIMD128-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $3 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push30=, $3, $pop43 -; NO-SIMD128-NEXT: i32.and $push31=, $19, $pop30 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $4 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push18=, $4, $pop36 +; NO-SIMD128-NEXT: i32.and $push19=, $20, $pop18 +; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $3 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push22=, $3, $pop35 +; NO-SIMD128-NEXT: i32.and $push23=, $19, $pop22 +; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $2 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push26=, $2, $pop34 +; NO-SIMD128-NEXT: i32.and $push27=, $18, $pop26 +; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $1 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $1, $pop33 +; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop30 ; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $2 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push34=, $2, $pop42 -; NO-SIMD128-NEXT: i32.and $push35=, $18, $pop34 -; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $1 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push38=, $1, $pop41 -; NO-SIMD128-NEXT: i32.and $push39=, $17, $pop38 -; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v8i16: @@ -9126,55 +7606,47 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop39 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop6 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop38 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $20, $pop14 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop44 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop43 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop42 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $23, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $24, $pop30 ; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop41 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <8 x i16> %v1, %c %inv_mask = xor <8 x i16> @@ -9203,46 +7675,38 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v8i16: ; NO-SIMD128: .functype bitselect_xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 14 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $16, $24 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $8 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $24 -; NO-SIMD128-NEXT: i32.store16 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push5=, $15, $23 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $7 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $23 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push13=, 10 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22 -; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $6 -; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22 -; NO-SIMD128-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.xor $push15=, $13, $21 -; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $5 -; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 6 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20 -; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $4 -; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.xor $push23=, $11, $19 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $3 -; NO-SIMD128-NEXT: i32.xor $push25=, $pop24, $19 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 -; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18 -; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $2 -; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 -; NO-SIMD128-NEXT: i32.xor $push29=, $9, $17 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $1 -; NO-SIMD128-NEXT: i32.xor $push31=, $pop30, $17 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $15, $23 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $7 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $23 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $14, $22 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $6 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $22 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $13, $21 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $5 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $12, $20 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $4 +; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $20 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $11, $19 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $3 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.xor $push18=, $10, $18 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $2 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push21=, $9, $17 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $1 +; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v8i16: @@ -9260,34 +7724,26 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $19 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $20 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $13, $21 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $14, $22 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $22 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $15, $23 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $16, $24 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $20 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $20 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %and = and <8 x i16> %xor1, %c @@ -9314,62 +7770,54 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x ; NO-SIMD128-LABEL: bitselect_xor_reversed_v8i16: ; NO-SIMD128: .functype bitselect_xor_reversed_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $16, $24 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $24 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $23 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop47 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $23 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.xor $push14=, $14, $22 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $6, $pop46 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $15, $23 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop39 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $23 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $6, $pop38 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $13, $21 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $5, $pop37 ; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $22 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.xor $push20=, $13, $21 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $5, $pop45 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.xor $push24=, $12, $20 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push23=, $4, $pop44 -; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $20 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.xor $push30=, $11, $19 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push29=, $3, $pop43 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $4, $pop36 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push22=, $11, $19 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $3, $pop35 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $2, $pop34 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.xor $push30=, $9, $17 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $1, $pop33 ; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $19 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 -; NO-SIMD128-NEXT: i32.xor $push34=, $10, $18 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push33=, $2, $pop42 -; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $18 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 -; NO-SIMD128-NEXT: i32.xor $push38=, $9, $17 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push37=, $1, $pop41 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v8i16: @@ -9382,55 +7830,47 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $17 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $10, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop39 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $18 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop38 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $19 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $12, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $13, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop44 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $14, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop43 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $22 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $15, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33 ; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $16, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop41 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %notc = xor <8 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1, @@ -9458,46 +7898,38 @@ define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: extmul_low_s_v8i16: ; NO-SIMD128: .functype extmul_low_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push0=, $21 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $24 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push4=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $19 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $23 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $18 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $22 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push10=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $17 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $21 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 14 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $24 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $23 -; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 -; NO-SIMD128-NEXT: i32.const $push25=, 10 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.extend8_s $push28=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $20 -; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 -; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $19 +; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $18 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $17 +; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_s_v8i16: @@ -9515,34 +7947,26 @@ define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $19 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $4 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $20 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $21 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $22 -; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 -; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $24 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $20 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $21 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $22 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $23 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $24 +; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -9572,46 +7996,38 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: extmul_high_s_v8i16: ; NO-SIMD128: .functype extmul_high_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push0=, $29 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push4=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $27 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $31 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $26 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $30 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push10=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $25 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $29 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 14 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $16 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $32 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $28 ; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $31 -; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 -; NO-SIMD128-NEXT: i32.const $push25=, 10 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $30 -; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.extend8_s $push28=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $28 -; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 -; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $27 +; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $26 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $25 +; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_s_v8i16: @@ -9629,34 +8045,26 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $27 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $28 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $29 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $30 -; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $31 -; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $32 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $28 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $29 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $30 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $31 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $32 +; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -9687,61 +8095,53 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128: .functype extmul_low_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $21, $pop47 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $19, $pop45 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop43 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $17, $pop41 -; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $24, $pop39 -; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $24, $pop39 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 255 -; NO-SIMD128-NEXT: i32.and $push18=, $23, $pop37 -; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $23, $pop37 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 255 -; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $22, $pop35 -; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $22, $pop35 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 255 -; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 255 -; NO-SIMD128-NEXT: i32.and $push28=, $20, $pop33 -; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $21, $pop33 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $20, $pop31 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $19, $pop29 +; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $18, $pop27 +; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $17, $pop25 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_u_v8i16: @@ -9749,60 +8149,52 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop39 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop45 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop43 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop41 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop39 -; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop37 -; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop33 -; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop31 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop25 +; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -9833,61 +8225,53 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128: .functype extmul_high_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $13, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $29, $pop47 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $11, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $27, $pop45 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $10, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $26, $pop43 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $9, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $25, $pop41 -; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $16, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $32, $pop39 -; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop39 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $15, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 255 -; NO-SIMD128-NEXT: i32.and $push18=, $31, $pop37 -; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop37 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 255 -; NO-SIMD128-NEXT: i32.and $push24=, $14, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $30, $pop35 -; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop35 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 255 -; NO-SIMD128-NEXT: i32.and $push29=, $12, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 255 -; NO-SIMD128-NEXT: i32.and $push28=, $28, $pop33 -; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop33 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop31 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop29 +; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop27 +; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop25 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_u_v8i16: @@ -9895,60 +8279,52 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop39 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop45 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop43 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop41 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $13, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $29, $pop39 -; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $14, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $30, $pop37 -; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $15, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $31, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $32, $pop33 -; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $13, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $29, $pop31 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $14, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $15, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $31, $pop27 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $32, $pop25 +; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -9979,16 +8355,14 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: add_v4i32: ; NO-SIMD128: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.add $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.add $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v4i32: @@ -10000,10 +8374,8 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = add <4 x i32> %x, %y ret <4 x i32> %a @@ -10025,16 +8397,14 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: sub_v4i32: ; NO-SIMD128: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.sub $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.sub $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v4i32: @@ -10046,10 +8416,8 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> %x, %y ret <4 x i32> %a @@ -10071,16 +8439,14 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: mul_v4i32: ; NO-SIMD128: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.mul $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.mul $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v4i32: @@ -10092,10 +8458,8 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = mul <4 x i32> %x, %y ret <4 x i32> %a @@ -10117,20 +8481,18 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: min_s_v4i32: ; NO-SIMD128: .functype min_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.lt_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.lt_s $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.lt_s $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.lt_s $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.lt_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_s $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_s $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v4i32: @@ -10145,11 +8507,9 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10172,20 +8532,18 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: min_u_v4i32: ; NO-SIMD128: .functype min_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.lt_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.lt_u $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.lt_u $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.lt_u $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.lt_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_u $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_u $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.lt_u $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v4i32: @@ -10200,11 +8558,9 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_u $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10227,20 +8583,18 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: max_s_v4i32: ; NO-SIMD128: .functype max_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.gt_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.gt_s $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.gt_s $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.gt_s $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.gt_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_s $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_s $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v4i32: @@ -10255,11 +8609,9 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10282,20 +8634,18 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: max_u_v4i32: ; NO-SIMD128: .functype max_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.gt_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.gt_u $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.gt_u $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.gt_u $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.gt_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_u $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_u $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.gt_u $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v4i32: @@ -10310,11 +8660,9 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_u $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10337,63 +8685,59 @@ define <4 x i32> @abs_v4i32(<4 x i32> %x) { ; NO-SIMD128-LABEL: abs_v4i32: ; NO-SIMD128: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.const $push0=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push21=, $4, $pop0 -; NO-SIMD128-NEXT: local.tee $push20=, $5=, $pop21 -; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop20 +; NO-SIMD128-NEXT: i32.shr_s $push19=, $4, $pop0 +; NO-SIMD128-NEXT: local.tee $push18=, $5=, $pop19 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop18 ; NO-SIMD128-NEXT: i32.sub $push2=, $pop1, $5 -; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push19=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push18=, $3, $pop19 -; NO-SIMD128-NEXT: local.tee $push17=, $4=, $pop18 -; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push17=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push16=, $3, $pop17 +; NO-SIMD128-NEXT: local.tee $push15=, $4=, $pop16 +; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop15 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop3, $4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push14=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push13=, $2, $pop14 +; NO-SIMD128-NEXT: local.tee $push12=, $4=, $pop13 +; NO-SIMD128-NEXT: i32.xor $push5=, $2, $pop12 ; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $4 -; NO-SIMD128-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push16=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push15=, $2, $pop16 -; NO-SIMD128-NEXT: local.tee $push14=, $4=, $pop15 -; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop14 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push11=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push10=, $1, $pop11 +; NO-SIMD128-NEXT: local.tee $push9=, $4=, $pop10 +; NO-SIMD128-NEXT: i32.xor $push7=, $1, $pop9 ; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push13=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push12=, $1, $pop13 -; NO-SIMD128-NEXT: local.tee $push11=, $4=, $pop12 -; NO-SIMD128-NEXT: i32.xor $push9=, $1, $pop11 -; NO-SIMD128-NEXT: i32.sub $push10=, $pop9, $4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop10 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v4i32: ; NO-SIMD128-FAST: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push20=, $5=, $pop21 -; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push19=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push18=, $5=, $pop19 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop18 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop1, $5 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $2, $pop19 -; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: local.tee $push15=, $1=, $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $3, $pop16 -; NO-SIMD128-FAST-NEXT: local.tee $push14=, $2=, $pop15 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $3, $pop14 +; NO-SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop12 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $2 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $4, $pop13 -; NO-SIMD128-FAST-NEXT: local.tee $push11=, $0=, $pop12 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $0 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: local.tee $push9=, $3=, $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $3 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> zeroinitializer, %x %b = icmp slt <4 x i32> %x, zeroinitializer @@ -10418,19 +8762,17 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) { ; NO-SIMD128: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $3 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop9, $2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop8, $1 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $4 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop7, $4 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop7, $3 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop6, $2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop5, $1 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v4i32: @@ -10439,17 +8781,15 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop9, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop7, $2 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop8, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop7, $4 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop5, $4 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x ret <4 x i32> %a @@ -10471,16 +8811,14 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shl_v4i32: ; NO-SIMD128: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shl $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shl $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shl $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shl $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v4i32: @@ -10492,10 +8830,8 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10523,19 +8859,17 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { ; NO-SIMD128: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $3, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $2, $pop9 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $1, $pop8 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.shl $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v4i32: @@ -10544,17 +8878,15 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5> ret <4 x i32> %a @@ -10606,16 +8938,14 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shl_vec_v4i32: ; NO-SIMD128: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shl $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shl $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shl $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shl $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v4i32: @@ -10627,10 +8957,8 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, %x ret <4 x i32> %a @@ -10652,16 +8980,14 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shr_s_v4i32: ; NO-SIMD128: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v4i32: @@ -10673,10 +8999,8 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10731,16 +9055,14 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v4i32: ; NO-SIMD128: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v4i32: @@ -10752,10 +9074,8 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = ashr <4 x i32> %v, %x ret <4 x i32> %a @@ -10777,16 +9097,14 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shr_u_v4i32: ; NO-SIMD128: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v4i32: @@ -10798,10 +9116,8 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10856,16 +9172,14 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shr_u_vec_v4i32: ; NO-SIMD128: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v4i32: @@ -10877,10 +9191,8 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = lshr <4 x i32> %v, %x ret <4 x i32> %a @@ -10902,16 +9214,14 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: and_v4i32: ; NO-SIMD128: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.and $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.and $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v4i32: @@ -10923,10 +9233,8 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = and <4 x i32> %x, %y ret <4 x i32> %a @@ -10948,16 +9256,14 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: or_v4i32: ; NO-SIMD128: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.or $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.or $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v4i32: @@ -10969,10 +9275,8 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = or <4 x i32> %x, %y ret <4 x i32> %a @@ -10994,16 +9298,14 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: xor_v4i32: ; NO-SIMD128: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.xor $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.xor $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v4i32: @@ -11015,10 +9317,8 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, %y ret <4 x i32> %a @@ -11041,19 +9341,17 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) { ; NO-SIMD128: .functype not_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $3, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $2, $pop9 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $1, $pop8 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v4i32: @@ -11062,17 +9360,15 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ret <4 x i32> %a @@ -11096,23 +9392,21 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128: .functype andnot_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $7, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push13=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop13 -; NO-SIMD128-NEXT: i32.and $push4=, $2, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push12=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $5, $pop12 -; NO-SIMD128-NEXT: i32.and $push6=, $1, $pop5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 ; NO-SIMD128-NEXT: i32.const $push11=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $8, $pop11 -; NO-SIMD128-NEXT: i32.and $push8=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.xor $push3=, $7, $pop11 +; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3 +; NO-SIMD128-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push10=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $6, $pop10 +; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $5, $pop9 +; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v4i32: @@ -11122,20 +9416,18 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $5, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop11 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop10 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $pop11 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> %a = and <4 x i32> %x, %inv_y @@ -11161,32 +9453,30 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { ; NO-SIMD128-LABEL: bitselect_v4i32: ; NO-SIMD128: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $4, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $12 ; NO-SIMD128-NEXT: i32.and $push0=, $4, $8 ; NO-SIMD128-NEXT: i32.or $push4=, $pop3, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $3, $pop21 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $11 -; NO-SIMD128-NEXT: i32.and $push7=, $3, $7 -; NO-SIMD128-NEXT: i32.or $push10=, $pop9, $pop7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push12=, $2, $pop20 -; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $10 -; NO-SIMD128-NEXT: i32.and $push11=, $2, $6 -; NO-SIMD128-NEXT: i32.or $push14=, $pop13, $pop11 -; NO-SIMD128-NEXT: i32.store 4($0), $pop14 +; NO-SIMD128-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop19 -; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $9 -; NO-SIMD128-NEXT: i32.and $push15=, $1, $5 -; NO-SIMD128-NEXT: i32.or $push18=, $pop17, $pop15 -; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop19 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $11 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $7 +; NO-SIMD128-NEXT: i32.or $push8=, $pop7, $pop5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $2, $pop18 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $10 +; NO-SIMD128-NEXT: i32.and $push9=, $2, $6 +; NO-SIMD128-NEXT: i32.or $push12=, $pop11, $pop9 +; NO-SIMD128-NEXT: i32.store 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $1, $pop17 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $9 +; NO-SIMD128-NEXT: i32.and $push13=, $1, $5 +; NO-SIMD128-NEXT: i32.or $push16=, $pop15, $pop13 +; NO-SIMD128-NEXT: i32.store 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v4i32: @@ -11198,26 +9488,24 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { ; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5 ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop3, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop19 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $10 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $6 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop7, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop18 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $11 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop11, $pop9 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $12 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop15, $pop13 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <4 x i32> %c, %v1 %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c @@ -11244,24 +9532,22 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v4i32: ; NO-SIMD128: .functype bitselect_xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $8, $12 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $4 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $12 -; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.xor $push5=, $7, $11 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $3 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $11 -; NO-SIMD128-NEXT: i32.store 8($0), $pop7 -; NO-SIMD128-NEXT: i32.xor $push8=, $6, $10 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $2 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $10 -; NO-SIMD128-NEXT: i32.store 4($0), $pop10 -; NO-SIMD128-NEXT: i32.xor $push11=, $5, $9 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $1 -; NO-SIMD128-NEXT: i32.xor $push13=, $pop12, $9 -; NO-SIMD128-NEXT: i32.store 0($0), $pop13 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $7, $11 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $3 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $6, $10 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $2 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $5, $9 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $1 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v4i32: @@ -11279,12 +9565,10 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $11 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $8, $12 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $12 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $12 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %and = and <4 x i32> %xor1, %c @@ -11311,32 +9595,30 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x ; NO-SIMD128-LABEL: bitselect_xor_reversed_v4i32: ; NO-SIMD128: .functype bitselect_xor_reversed_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $8, $12 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $12 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $11 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $3, $pop21 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $11 -; NO-SIMD128-NEXT: i32.store 8($0), $pop10 -; NO-SIMD128-NEXT: i32.xor $push12=, $6, $10 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $2, $pop20 -; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $pop11 -; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $10 -; NO-SIMD128-NEXT: i32.store 4($0), $pop14 -; NO-SIMD128-NEXT: i32.xor $push16=, $5, $9 +; NO-SIMD128-NEXT: i32.store 12($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $7, $11 ; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $1, $pop19 -; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $pop15 -; NO-SIMD128-NEXT: i32.xor $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop19 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $10 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $2, $pop18 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $5, $9 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $1, $pop17 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v4i32: @@ -11349,25 +9631,23 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $9 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop19 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $10 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $7, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop18 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $11 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $8, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $12 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %notc = xor <4 x i32> %c, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -11394,24 +9674,22 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: extmul_low_s_v4i32: ; NO-SIMD128: .functype extmul_low_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push0=, $11 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $12 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push4=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $10 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $11 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $9 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $10 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 12 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.extend16_s $push10=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $12 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $9 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_s_v4i32: @@ -11429,12 +9707,10 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $11 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $4 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $12 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $12 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -11464,24 +9740,22 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: extmul_high_s_v4i32: ; NO-SIMD128: .functype extmul_high_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push0=, $15 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push4=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $14 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $15 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $13 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $14 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 12 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.extend16_s $push10=, $8 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $16 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $13 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_s_v4i32: @@ -11499,12 +9773,10 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $15 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $16 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $16 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -11535,31 +9807,29 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128: .functype extmul_low_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop0 -; NO-SIMD128-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $11, $pop21 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop20 +; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop0 ; NO-SIMD128-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $10, $pop19 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push1=, $12, $pop19 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop18 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop18 ; NO-SIMD128-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop17 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store 0($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 12 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop17 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop16 ; NO-SIMD128-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop15 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 4($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop14 +; NO-SIMD128-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop13 ; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.store 0($0), $pop12 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_u_v4i32: @@ -11567,30 +9837,28 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop19 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop19 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop18 ; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop16 ; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop13 ; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -11621,31 +9889,29 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128: .functype extmul_high_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $7, $pop0 -; NO-SIMD128-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $15, $pop21 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop20 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $14, $pop19 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop19 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop18 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop18 ; NO-SIMD128-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop17 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store 0($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 12 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop17 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop16 ; NO-SIMD128-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop15 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 4($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop14 +; NO-SIMD128-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop13 ; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.store 0($0), $pop12 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_u_v4i32: @@ -11653,30 +9919,28 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop19 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop19 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop18 ; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop16 ; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop13 ; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -13061,16 +11325,14 @@ define <4 x float> @neg_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: neg_v4f32: ; NO-SIMD128: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.neg $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.neg $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.neg $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.neg $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.neg $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.neg $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.neg $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.neg $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v4f32: @@ -13082,10 +11344,8 @@ define <4 x float> @neg_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.neg $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.neg $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.neg $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x ret <4 x float> %a @@ -13108,16 +11368,14 @@ define <4 x float> @abs_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: abs_v4f32: ; NO-SIMD128: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.abs $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.abs $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.abs $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.abs $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.abs $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.abs $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.abs $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.abs $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v4f32: @@ -13129,10 +11387,8 @@ define <4 x float> @abs_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.abs $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.abs $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.abs $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) ret <4 x float> %a @@ -13157,54 +11413,50 @@ define <4 x float> @min_unordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.gt $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.gt $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.gt $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.gt $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_unordered_v4f32: ; NO-SIMD128-FAST: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.> %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13231,54 +11483,50 @@ define <4 x float> @max_unordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.lt $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.lt $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.lt $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.lt $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_unordered_v4f32: ; NO-SIMD128-FAST: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.> %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13305,54 +11553,50 @@ define <4 x float> @min_ordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.ge $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.ge $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.ge $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.ge $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_ordered_v4f32: ; NO-SIMD128-FAST: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x %a = select <4 x i1> %cmps, @@ -13379,54 +11623,50 @@ define <4 x float> @max_ordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.le $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.le $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.le $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.le $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_ordered_v4f32: ; NO-SIMD128-FAST: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x %a = select <4 x i1> %cmps, @@ -13451,16 +11691,14 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: min_intrinsic_v4f32: ; NO-SIMD128: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.min $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.min $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.min $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.min $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.min $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.min $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.min $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.min $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_intrinsic_v4f32: @@ -13472,10 +11710,8 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.min $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13552,16 +11788,14 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: minnum_intrinsic_v4f32: ; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: @@ -13573,10 +11807,8 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13598,16 +11830,14 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: minnum_nsz_intrinsic_v4f32: ; NO-SIMD128: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32: @@ -13619,10 +11849,8 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13647,19 +11875,17 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push2=, fminf, $2, $pop9 -; NO-SIMD128-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push3=, fminf, $1, $pop8 -; NO-SIMD128-NEXT: f32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 ; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-NEXT: call $push6=, fminf, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.const $push5=, -0x1p0 +; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: fminnumv432_non_zero_intrinsic: @@ -13668,17 +11894,15 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 ; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop6 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push6=, fminf, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $4, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float -1.0, float -1.0, float -1.0>) ret <4 x float> %a @@ -13755,19 +11979,17 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 -; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fminf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 +; NO-SIMD128-NEXT: call $push4=, fminf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fminf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic: @@ -13779,14 +12001,12 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fminf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>) ret <4 x float> %a @@ -13809,16 +12029,14 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: max_intrinsic_v4f32: ; NO-SIMD128: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.max $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.max $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.max $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.max $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.max $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.max $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.max $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.max $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_intrinsic_v4f32: @@ -13830,10 +12048,8 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13910,16 +12126,14 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32: ; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: @@ -13931,10 +12145,8 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13956,16 +12168,14 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32: ; NO-SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32: @@ -13977,10 +12187,8 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -14057,19 +12265,17 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 -; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32: @@ -14081,14 +12287,12 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>) ret <4 x float> %a @@ -14113,19 +12317,17 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x1p0 -; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x1p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_non_zero_intrinsic_v4f32: @@ -14137,14 +12339,12 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x1p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 1.0, float -1.0, float -1.0>) ret <4 x float> %a @@ -14240,20 +12440,18 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: pmin_v4f32: ; NO-SIMD128: .functype pmin_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.lt $push0=, $7, $3 -; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.lt $push2=, $6, $2 -; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.lt $push4=, $5, $1 -; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 -; NO-SIMD128-NEXT: f32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: f32.lt $push6=, $8, $4 -; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: f32.lt $push0=, $8, $4 +; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $7, $3 +; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2 +; NO-SIMD128-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $6, $2 +; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4 +; NO-SIMD128-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-NEXT: f32.lt $push6=, $5, $1 +; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmin_v4f32: @@ -14268,11 +12466,9 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $7, $3 ; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $8, $4 ; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %y, %x %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x @@ -14295,28 +12491,26 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: pmin_int_v4f32: ; NO-SIMD128: .functype pmin_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $8 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $4 ; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $7 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $3 -; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 -; NO-SIMD128-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $6 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $2 -; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 -; NO-SIMD128-NEXT: i32.store 4($0), $pop13 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $5 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $1 -; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 -; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 -; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $3 +; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6 +; NO-SIMD128-NEXT: i32.store 8($0), $pop7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $6 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $2 +; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop11 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $5 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $1 +; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14 +; NO-SIMD128-NEXT: i32.store 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmin_int_v4f32: @@ -14337,13 +12531,11 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $8 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $4 ; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> @@ -14368,20 +12560,18 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: pmax_v4f32: ; NO-SIMD128: .functype pmax_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.lt $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.lt $push2=, $2, $6 -; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.lt $push4=, $1, $5 -; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 -; NO-SIMD128-NEXT: f32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: f32.lt $push6=, $4, $8 -; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7 +; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2 +; NO-SIMD128-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6 +; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4 +; NO-SIMD128-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5 +; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmax_v4f32: @@ -14396,11 +12586,9 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %x, %y %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x @@ -14423,28 +12611,26 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: pmax_int_v4f32: ; NO-SIMD128: .functype pmax_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $8 ; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $7 -; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 -; NO-SIMD128-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $2 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $6 -; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 -; NO-SIMD128-NEXT: i32.store 4($0), $pop13 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $1 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $5 -; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 -; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 -; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $7 +; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6 +; NO-SIMD128-NEXT: i32.store 8($0), $pop7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $2 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $6 +; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop11 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $1 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $5 +; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14 +; NO-SIMD128-NEXT: i32.store 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmax_int_v4f32: @@ -14465,13 +12651,11 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $4 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $8 ; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> @@ -14496,16 +12680,14 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: add_v4f32: ; NO-SIMD128: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.add $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.add $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.add $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.add $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.add $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.add $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.add $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.add $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v4f32: @@ -14517,10 +12699,8 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.add $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.add $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.add $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fadd <4 x float> %x, %y ret <4 x float> %a @@ -14542,16 +12722,14 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: sub_v4f32: ; NO-SIMD128: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.sub $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.sub $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.sub $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.sub $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.sub $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.sub $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.sub $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.sub $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v4f32: @@ -14563,10 +12741,8 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.sub $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.sub $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.sub $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fsub <4 x float> %x, %y ret <4 x float> %a @@ -14588,16 +12764,14 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: div_v4f32: ; NO-SIMD128: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.div $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.div $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.div $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.div $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.div $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.div $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.div $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.div $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: div_v4f32: @@ -14609,10 +12783,8 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.div $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.div $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.div $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fdiv <4 x float> %x, %y ret <4 x float> %a @@ -14634,16 +12806,14 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: mul_v4f32: ; NO-SIMD128: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.mul $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.mul $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.mul $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.mul $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.mul $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.mul $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.mul $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.mul $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v4f32: @@ -14655,10 +12825,8 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.mul $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.mul $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.mul $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fmul <4 x float> %x, %y ret <4 x float> %a @@ -14681,16 +12849,14 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: sqrt_v4f32: ; NO-SIMD128: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.sqrt $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.sqrt $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.sqrt $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.sqrt $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.sqrt $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.sqrt $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.sqrt $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.sqrt $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sqrt_v4f32: @@ -14702,10 +12868,8 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.sqrt $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.sqrt $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) ret <4 x float> %a diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll index d2a38de..5ec9f6a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -38,44 +38,22 @@ define <16 x i8> @splat_v16i8(i8 %x) { ; NO-SIMD128-LABEL: splat_v16i8: ; NO-SIMD128: .functype splat_v16i8 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $1 +; NO-SIMD128-NEXT: i32.store8 14($0), $1 +; NO-SIMD128-NEXT: i32.store8 13($0), $1 +; NO-SIMD128-NEXT: i32.store8 12($0), $1 +; NO-SIMD128-NEXT: i32.store8 11($0), $1 +; NO-SIMD128-NEXT: i32.store8 10($0), $1 +; NO-SIMD128-NEXT: i32.store8 9($0), $1 ; NO-SIMD128-NEXT: i32.store8 8($0), $1 +; NO-SIMD128-NEXT: i32.store8 7($0), $1 +; NO-SIMD128-NEXT: i32.store8 6($0), $1 +; NO-SIMD128-NEXT: i32.store8 5($0), $1 ; NO-SIMD128-NEXT: i32.store8 4($0), $1 +; NO-SIMD128-NEXT: i32.store8 3($0), $1 ; NO-SIMD128-NEXT: i32.store8 2($0), $1 ; NO-SIMD128-NEXT: i32.store8 1($0), $1 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $1 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $1 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $1 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $1 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $1 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $1 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $1 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $1 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $1 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $1 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $1 ; NO-SIMD128-NEXT: return %v = insertelement <16 x i8> undef, i8 %x, i32 0 %res = shufflevector <16 x i8> %v, <16 x i8> undef, @@ -356,44 +334,22 @@ define <16 x i8> @replace_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: replace_v16i8: ; NO-SIMD128: .functype replace_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $17 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $17 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 11 ret <16 x i8> %res @@ -461,44 +417,22 @@ define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: replace_zero_v16i8: ; NO-SIMD128: .functype replace_zero_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $12 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $17 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 0 ret <16 x i8> %res @@ -514,44 +448,22 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: shuffle_v16i8: ; NO-SIMD128: .functype shuffle_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $32 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $30 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $28 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $26 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $24 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $22 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $20 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $18 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $32 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $30 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $28 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $26 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $24 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $22 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $20 ; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, @@ -569,44 +481,22 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v16i8: ; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $2 +; NO-SIMD128-NEXT: i32.store8 14($0), $2 +; NO-SIMD128-NEXT: i32.store8 13($0), $2 +; NO-SIMD128-NEXT: i32.store8 12($0), $2 +; NO-SIMD128-NEXT: i32.store8 11($0), $2 +; NO-SIMD128-NEXT: i32.store8 10($0), $2 +; NO-SIMD128-NEXT: i32.store8 9($0), $2 ; NO-SIMD128-NEXT: i32.store8 8($0), $2 +; NO-SIMD128-NEXT: i32.store8 7($0), $2 +; NO-SIMD128-NEXT: i32.store8 6($0), $2 +; NO-SIMD128-NEXT: i32.store8 5($0), $2 ; NO-SIMD128-NEXT: i32.store8 4($0), $2 +; NO-SIMD128-NEXT: i32.store8 3($0), $2 ; NO-SIMD128-NEXT: i32.store8 2($0), $2 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $2 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $2 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $2 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $2 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $2 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $2 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $2 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $2 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $2 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $2 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, @@ -641,44 +531,22 @@ define <16 x i8> @build_v16i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3, ; NO-SIMD128-LABEL: build_v16i8: ; NO-SIMD128: .functype build_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $12 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return i8 %x4, i8 %x5, i8 %x6, i8 %x7, i8 %x8, i8 %x9, i8 %x10, i8 %x11, @@ -734,22 +602,14 @@ define <8 x i16> @splat_v8i16(i16 %x) { ; NO-SIMD128-LABEL: splat_v8i16: ; NO-SIMD128: .functype splat_v8i16 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $1 +; NO-SIMD128-NEXT: i32.store16 12($0), $1 +; NO-SIMD128-NEXT: i32.store16 10($0), $1 ; NO-SIMD128-NEXT: i32.store16 8($0), $1 +; NO-SIMD128-NEXT: i32.store16 6($0), $1 ; NO-SIMD128-NEXT: i32.store16 4($0), $1 ; NO-SIMD128-NEXT: i32.store16 2($0), $1 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $1 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $1 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $1 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $1 ; NO-SIMD128-NEXT: return %v = insertelement <8 x i16> undef, i16 %x, i32 0 %res = shufflevector <8 x i16> %v, <8 x i16> undef, @@ -1016,22 +876,14 @@ define <8 x i16> @replace_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: replace_v8i16: ; NO-SIMD128: .functype replace_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $9 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $9 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 7 ret <8 x i16> %res @@ -1095,22 +947,14 @@ define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: replace_zero_v8i16: ; NO-SIMD128: .functype replace_zero_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $9 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 0 ret <8 x i16> %res @@ -1126,22 +970,14 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: shuffle_v8i16: ; NO-SIMD128: .functype shuffle_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $16 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $14 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $12 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $10 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $12 ; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> @@ -1158,22 +994,14 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v8i16: ; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $2 +; NO-SIMD128-NEXT: i32.store16 12($0), $2 +; NO-SIMD128-NEXT: i32.store16 10($0), $2 ; NO-SIMD128-NEXT: i32.store16 8($0), $2 +; NO-SIMD128-NEXT: i32.store16 6($0), $2 ; NO-SIMD128-NEXT: i32.store16 4($0), $2 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $2 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $2 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $2 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, @@ -1198,22 +1026,14 @@ define <8 x i16> @build_v8i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3, ; NO-SIMD128-LABEL: build_v8i16: ; NO-SIMD128: .functype build_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return i16 %x4, i16 %x5, i16 %x6, i16 %x7) { %t0 = insertelement <8 x i16> undef, i16 %x0, i32 0 @@ -1258,12 +1078,10 @@ define <4 x i32> @splat_v4i32(i32 %x) { ; NO-SIMD128-LABEL: splat_v4i32: ; NO-SIMD128: .functype splat_v4i32 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $1 ; NO-SIMD128-NEXT: i32.store 8($0), $1 ; NO-SIMD128-NEXT: i32.store 4($0), $1 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $1 ; NO-SIMD128-NEXT: return %v = insertelement <4 x i32> undef, i32 %x, i32 0 %res = shufflevector <4 x i32> %v, <4 x i32> undef, @@ -1368,12 +1186,10 @@ define <4 x i32> @replace_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: replace_v4i32: ; NO-SIMD128: .functype replace_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $5 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 2 ret <4 x i32> %res @@ -1433,12 +1249,10 @@ define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: replace_zero_v4i32: ; NO-SIMD128: .functype replace_zero_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $5 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 0 ret <4 x i32> %res @@ -1454,12 +1268,10 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: shuffle_v4i32: ; NO-SIMD128: .functype shuffle_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $8 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $6 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $8 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7> @@ -1476,12 +1288,10 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v4i32: ; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $2 ; NO-SIMD128-NEXT: i32.store 8($0), $2 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> @@ -1501,12 +1311,10 @@ define <4 x i32> @build_v4i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; NO-SIMD128-LABEL: build_v4i32: ; NO-SIMD128: .functype build_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %t0 = insertelement <4 x i32> undef, i32 %x0, i32 0 %t1 = insertelement <4 x i32> %t0, i32 %x1, i32 1 @@ -1801,12 +1609,10 @@ define <4 x float> @splat_v4f32(float %x) { ; NO-SIMD128-LABEL: splat_v4f32: ; NO-SIMD128: .functype splat_v4f32 (i32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $1 ; NO-SIMD128-NEXT: f32.store 8($0), $1 ; NO-SIMD128-NEXT: f32.store 4($0), $1 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $1 ; NO-SIMD128-NEXT: return %v = insertelement <4 x float> undef, float %x, i32 0 %res = shufflevector <4 x float> %v, <4 x float> undef, @@ -1911,12 +1717,10 @@ define <4 x float> @replace_v4f32(<4 x float> %v, float %x) { ; NO-SIMD128-LABEL: replace_v4f32: ; NO-SIMD128: .functype replace_v4f32 (i32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $5 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 2 ret <4 x float> %res @@ -1976,12 +1780,10 @@ define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) { ; NO-SIMD128-LABEL: replace_zero_v4f32: ; NO-SIMD128: .functype replace_zero_v4f32 (i32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $5 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 0 ret <4 x float> %res @@ -1997,12 +1799,10 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: shuffle_v4f32: ; NO-SIMD128: .functype shuffle_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $8 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $6 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $8 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7> @@ -2019,12 +1819,10 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v4f32: ; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $2 ; NO-SIMD128-NEXT: f32.store 8($0), $2 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> @@ -2044,12 +1842,10 @@ define <4 x float> @build_v4f32(float %x0, float %x1, float %x2, float %x3) { ; NO-SIMD128-LABEL: build_v4f32: ; NO-SIMD128: .functype build_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %t0 = insertelement <4 x float> undef, float %x0, i32 0 %t1 = insertelement <4 x float> %t0, float %x1, i32 1 diff --git a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll index 609be3b..50e736a 100644 --- a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll +++ b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s | FileCheck %s ; Check that the shr(shl X, 56), 48) is not mistakenly turned into @@ -16,11 +17,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-unknown-linux-gnu" define i64 @foo(i64 %b) nounwind readnone { -entry: ; CHECK-LABEL: foo: -; CHECK: movsbq %dil, %rax -; CHECK: shlq $8, %rax -; CHECK: orq $1, %rax +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsbq %dil, %rax +; CHECK-NEXT: shlq $8, %rax +; CHECK-NEXT: incq %rax +; CHECK-NEXT: retq +entry: %shl = shl i64 %b, 56 ; <i64> [#uses=1] %shr = ashr i64 %shl, 48 ; <i64> [#uses=1] %add5 = or i64 %shr, 1 ; <i64> [#uses=1] diff --git a/llvm/test/CodeGen/X86/AppendingLinkage.ll b/llvm/test/CodeGen/X86/AppendingLinkage.ll index 83bfbe8..ace5d19 100644 --- a/llvm/test/CodeGen/X86/AppendingLinkage.ll +++ b/llvm/test/CodeGen/X86/AppendingLinkage.ll @@ -1,4 +1,4 @@ ; RUN: not --crash llc < %s -mtriple=i686-- 2>&1 | FileCheck %s -; CHECK: unknown special variable +; CHECK: unknown special variable with appending linkage @foo = appending constant [1 x i32 ]zeroinitializer diff --git a/llvm/test/CodeGen/X86/combine-pavg.ll b/llvm/test/CodeGen/X86/combine-pavg.ll index 7a8ddf5..cb2d426 100644 --- a/llvm/test/CodeGen/X86/combine-pavg.ll +++ b/llvm/test/CodeGen/X86/combine-pavg.ll @@ -84,25 +84,22 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16 define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: combine_pavgw_demandedelts: ; SSE: # %bb.0: -; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] ; SSE-NEXT: pavgw %xmm1, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: combine_pavgw_demandedelts: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_pavgw_demandedelts: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: retq %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1) diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index 548cf24..13c9585 100644 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -869,13 +869,13 @@ body: | $ymm0 = VSHUFPSZ256rmi $ymm0, $rdi, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm0 = VSHUFPSYrri $ymm0, $ymm1, -24 $ymm0 = VSHUFPSZ256rri $ymm0, $ymm1, -24 - ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPDYr $ymm0, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYri $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rri $ymm0, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPSYr $ymm0, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYri $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rri $ymm0, 15, implicit $mxcsr ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32 $ymm0 = VSHUFF32X4Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228 @@ -1751,13 +1751,13 @@ body: | $xmm0 = VALIGNQZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 8 $xmm0 = VALIGNQZ128rri $xmm0, $xmm1, 1 - ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPDr $xmm0, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDri $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rri $xmm0, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSri $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rri $xmm0, 15, implicit $mxcsr RET64 @@ -2308,21 +2308,21 @@ body: | $xmm0 = VINSERTPSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VINSERTPSrr $xmm0, $xmm0, 1 $xmm0 = VINSERTPSZrr $xmm0, $xmm0, 1 - ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDri $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSri $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDr_Int $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDri_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSri_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15, implicit $mxcsr RET64 diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index d9ee5f0..ee7f4ae 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -173,16 +173,14 @@ define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwin ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovdqa (%edx), %xmm0 ; X86-NEXT: vpand (%ecx), %xmm0, %xmm0 -; X86-NEXT: vpextrb $6, %xmm0, %ecx -; X86-NEXT: movb %cl, (%eax) +; X86-NEXT: vpextrb $6, %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_extractelement: ; X64: # %bb.0: ; X64-NEXT: vmovdqa (%rdi), %xmm0 ; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 -; X64-NEXT: vpextrb $6, %xmm0, %eax -; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: vpextrb $6, %xmm0, (%rdx) ; X64-NEXT: retq %i0 = load <16 x i8>, ptr %origin0 %i1 = load <16 x i8>, ptr %origin1 diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll index 64d44d9..0123431 100644 --- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll +++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll @@ -1,59 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=NUM ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=SJLJ -; NUM-COUNT-3: endbr64 - -;SJLJ: main: # @main -;SJLJ-NEXT: .Lfunc_begin0: -;SJLJ-NEXT: # %bb.0: # %entry -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: pushq %rbp -;SJLJ: callq _Unwind_SjLj_Register -;SJLJ-NEXT: .Ltmp0: -;SJLJ-NEXT: callq _Z3foov -;SJLJ-NEXT: .Ltmp1: -;SJLJ-NEXT: # %bb.1: # %invoke.cont -;SJLJ-NEXT: movl -;SJLJ-NEXT: .LBB0_7: # %return -;SJLJ: callq _Unwind_SjLj_Unregister -;SJLJ: retq -;SJLJ-NEXT: .LBB0_9: -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: movl -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jb .LBB0_10 -;SJLJ-NEXT: # %bb.11: -;SJLJ-NEXT: ud2 -;SJLJ-NEXT: .LBB0_10: -;SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx -;SJLJ-NEXT: jmpq *(%rcx,%rax,8) -;SJLJ-NEXT: .LBB0_2: # %lpad -;SJLJ-NEXT: .Ltmp2: -;SJLJ-NEXT: endbr64 -;SJLJ: jne .LBB0_4 -;SJLJ-NEXT: # %bb.3: # %catch3 -;SJLJ: callq __cxa_begin_catch -;SJLJ: jmp .LBB0_6 -;SJLJ-NEXT: .LBB0_4: # %catch.fallthrough -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jne .LBB0_8 -;SJLJ-NEXT: # %bb.5: # %catch -;SJLJ: callq __cxa_begin_catch -;SJLJ: cmpb -;SJLJ-NEXT: .LBB0_6: # %return -;SJLJ: callq __cxa_end_catch -;SJLJ-NEXT: jmp .LBB0_7 -;SJLJ-NEXT: .LBB0_8: # %eh.resume -;SJLJ-NEXT: movl -;SJLJ-NEXT: .Lfunc_end0: -;SJLJ: .LJTI0_0: -;SJLJ-NEXT: .quad .LBB0_2 - @_ZTIi = external dso_local constant ptr @_ZTIc = external dso_local constant ptr ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main() #0 personality ptr @__gxx_personality_sj0 { +; NUM-LABEL: main: +; NUM: # %bb.0: # %entry +; NUM-NEXT: endbr64 +; NUM-NEXT: pushq %rbp +; NUM-NEXT: movq %rsp, %rbp +; NUM-NEXT: pushq %r15 +; NUM-NEXT: pushq %r14 +; NUM-NEXT: pushq %r13 +; NUM-NEXT: pushq %r12 +; NUM-NEXT: pushq %rbx +; NUM-NEXT: subq $120, %rsp +; NUM-NEXT: movl $0, -44(%rbp) +; NUM-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; NUM-NEXT: movq $GCC_except_table0, -112(%rbp) +; NUM-NEXT: movq %rbp, -104(%rbp) +; NUM-NEXT: movq %rsp, -88(%rbp) +; NUM-NEXT: movq $.LBB0_9, -96(%rbp) +; NUM-NEXT: movl $1, -144(%rbp) +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Register@PLT +; NUM-NEXT: .Ltmp0: +; NUM-NEXT: callq _Z3foov +; NUM-NEXT: .Ltmp1: +; NUM-NEXT: # %bb.1: # %invoke.cont +; NUM-NEXT: movl $1, -44(%rbp) +; NUM-NEXT: .LBB0_7: # %return +; NUM-NEXT: movl -44(%rbp), %ebx +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Unregister@PLT +; NUM-NEXT: movl %ebx, %eax +; NUM-NEXT: addq $120, %rsp +; NUM-NEXT: popq %rbx +; NUM-NEXT: popq %r12 +; NUM-NEXT: popq %r13 +; NUM-NEXT: popq %r14 +; NUM-NEXT: popq %r15 +; NUM-NEXT: popq %rbp +; NUM-NEXT: retq +; NUM-NEXT: .LBB0_9: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -144(%rbp), %eax +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jb .LBB0_10 +; NUM-NEXT: # %bb.11: +; NUM-NEXT: ud2 +; NUM-NEXT: .LBB0_10: +; NUM-NEXT: leaq .LJTI0_0(%rip), %rcx +; NUM-NEXT: jmpq *(%rcx,%rax,8) +; NUM-NEXT: .LBB0_2: # %lpad +; NUM-NEXT: .Ltmp2: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -140(%rbp), %ecx +; NUM-NEXT: movl -136(%rbp), %eax +; NUM-NEXT: movq %rcx, -56(%rbp) +; NUM-NEXT: movl %eax, -64(%rbp) +; NUM-NEXT: cmpl $2, %eax +; NUM-NEXT: jne .LBB0_4 +; NUM-NEXT: # %bb.3: # %catch3 +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movl (%rax), %eax +; NUM-NEXT: movl %eax, -60(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpl $5, %eax +; NUM-NEXT: jmp .LBB0_6 +; NUM-NEXT: .LBB0_4: # %catch.fallthrough +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jne .LBB0_8 +; NUM-NEXT: # %bb.5: # %catch +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movzbl (%rax), %eax +; NUM-NEXT: movb %al, -45(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpb $3, %al +; NUM-NEXT: .LBB0_6: # %return +; NUM-NEXT: setne %cl +; NUM-NEXT: movl %ecx, -44(%rbp) +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_end_catch +; NUM-NEXT: jmp .LBB0_7 +; NUM-NEXT: .LBB0_8: # %eh.resume +; NUM-NEXT: movl $-1, -144(%rbp) +; +; SJLJ-LABEL: main: +; SJLJ: # %bb.0: # %entry +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: pushq %rbp +; SJLJ-NEXT: movq %rsp, %rbp +; SJLJ-NEXT: pushq %r15 +; SJLJ-NEXT: pushq %r14 +; SJLJ-NEXT: pushq %r13 +; SJLJ-NEXT: pushq %r12 +; SJLJ-NEXT: pushq %rbx +; SJLJ-NEXT: subq $120, %rsp +; SJLJ-NEXT: movl $0, -44(%rbp) +; SJLJ-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; SJLJ-NEXT: movq $GCC_except_table0, -112(%rbp) +; SJLJ-NEXT: movq %rbp, -104(%rbp) +; SJLJ-NEXT: movq %rsp, -88(%rbp) +; SJLJ-NEXT: movq $.LBB0_9, -96(%rbp) +; SJLJ-NEXT: movl $1, -144(%rbp) +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Register@PLT +; SJLJ-NEXT: .Ltmp0: +; SJLJ-NEXT: callq _Z3foov +; SJLJ-NEXT: .Ltmp1: +; SJLJ-NEXT: # %bb.1: # %invoke.cont +; SJLJ-NEXT: movl $1, -44(%rbp) +; SJLJ-NEXT: .LBB0_7: # %return +; SJLJ-NEXT: movl -44(%rbp), %ebx +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Unregister@PLT +; SJLJ-NEXT: movl %ebx, %eax +; SJLJ-NEXT: addq $120, %rsp +; SJLJ-NEXT: popq %rbx +; SJLJ-NEXT: popq %r12 +; SJLJ-NEXT: popq %r13 +; SJLJ-NEXT: popq %r14 +; SJLJ-NEXT: popq %r15 +; SJLJ-NEXT: popq %rbp +; SJLJ-NEXT: retq +; SJLJ-NEXT: .LBB0_9: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -144(%rbp), %eax +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jb .LBB0_10 +; SJLJ-NEXT: # %bb.11: +; SJLJ-NEXT: ud2 +; SJLJ-NEXT: .LBB0_10: +; SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx +; SJLJ-NEXT: jmpq *(%rcx,%rax,8) +; SJLJ-NEXT: .LBB0_2: # %lpad +; SJLJ-NEXT: .Ltmp2: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -140(%rbp), %ecx +; SJLJ-NEXT: movl -136(%rbp), %eax +; SJLJ-NEXT: movq %rcx, -56(%rbp) +; SJLJ-NEXT: movl %eax, -64(%rbp) +; SJLJ-NEXT: cmpl $2, %eax +; SJLJ-NEXT: jne .LBB0_4 +; SJLJ-NEXT: # %bb.3: # %catch3 +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movl (%rax), %eax +; SJLJ-NEXT: movl %eax, -60(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpl $5, %eax +; SJLJ-NEXT: jmp .LBB0_6 +; SJLJ-NEXT: .LBB0_4: # %catch.fallthrough +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jne .LBB0_8 +; SJLJ-NEXT: # %bb.5: # %catch +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movzbl (%rax), %eax +; SJLJ-NEXT: movb %al, -45(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpb $3, %al +; SJLJ-NEXT: .LBB0_6: # %return +; SJLJ-NEXT: setne %cl +; SJLJ-NEXT: movl %ecx, -44(%rbp) +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_end_catch +; SJLJ-NEXT: jmp .LBB0_7 +; SJLJ-NEXT: .LBB0_8: # %eh.resume +; SJLJ-NEXT: movl $-1, -144(%rbp) entry: %retval = alloca i32, align 4 %exn.slot = alloca ptr diff --git a/llvm/test/CodeGen/X86/load-local-v3i129.ll b/llvm/test/CodeGen/X86/load-local-v3i129.ll index 8fa7ce0..eb5d172 100644 --- a/llvm/test/CodeGen/X86/load-local-v3i129.ll +++ b/llvm/test/CodeGen/X86/load-local-v3i129.ll @@ -12,7 +12,7 @@ define void @_start() nounwind { ; FAST-SHLD-NEXT: shrq $2, %rcx ; FAST-SHLD-NEXT: shldq $2, %rdx, %rcx ; FAST-SHLD-NEXT: andq $-4, %rax -; FAST-SHLD-NEXT: orq $1, %rax +; FAST-SHLD-NEXT: incq %rax ; FAST-SHLD-NEXT: movq %rax, -40(%rsp) ; FAST-SHLD-NEXT: movq %rcx, -32(%rsp) ; FAST-SHLD-NEXT: orq $-2, -56(%rsp) @@ -23,7 +23,7 @@ define void @_start() nounwind { ; SLOW-SHLD: # %bb.0: # %Entry ; SLOW-SHLD-NEXT: movq -40(%rsp), %rax ; SLOW-SHLD-NEXT: andq $-4, %rax -; SLOW-SHLD-NEXT: orq $1, %rax +; SLOW-SHLD-NEXT: incq %rax ; SLOW-SHLD-NEXT: movq %rax, -40(%rsp) ; SLOW-SHLD-NEXT: orq $-2, -56(%rsp) ; SLOW-SHLD-NEXT: movq $-1, -48(%rsp) diff --git a/llvm/test/CodeGen/X86/pr23664.ll b/llvm/test/CodeGen/X86/pr23664.ll index 453e5db..8179602 100644 --- a/llvm/test/CodeGen/X86/pr23664.ll +++ b/llvm/test/CodeGen/X86/pr23664.ll @@ -6,7 +6,7 @@ define i2 @f(i32 %arg) { ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: leal (%rdi,%rdi), %eax -; CHECK-NEXT: orb $1, %al +; CHECK-NEXT: incb %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %trunc = trunc i32 %arg to i1 diff --git a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll new file mode 100644 index 0000000..32c7e82 --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll @@ -0,0 +1,2213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL + +define <8 x i32> @trunc8i64_8i32_nsw(<8 x i64> %a) { +; SSE-LABEL: trunc8i64_8i32_nsw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i32_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc8i64_8i32_nsw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nsw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nsw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i32_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <8 x i64> %a to <8 x i32> + ret <8 x i32> %0 +} + +define <8 x i32> @trunc8i64_8i32_nuw(<8 x i64> %a) { +; SSE-LABEL: trunc8i64_8i32_nuw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i32_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc8i64_8i32_nuw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nuw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nuw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i32_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <8 x i64> %a to <8 x i32> + ret <8 x i32> %0 +} + +define <8 x i16> @trunc8i64_8i16_nsw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i16_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i16_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <8 x i64> %a to <8 x i16> + ret <8 x i16> %0 +} + +define <8 x i16> @trunc8i64_8i16_nuw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i16_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i16_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <8 x i64> %a to <8 x i16> + ret <8 x i16> %0 +} + +define void @trunc8i64_8i8_nsw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i8_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <8 x i64> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc8i64_8i8_nuw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i8_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <8 x i64> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define <8 x i16> @trunc8i32_8i16_nsw(<8 x i32> %a) { +; SSE2-LABEL: trunc8i32_8i16_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i32_8i16_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i16_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i16_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i16_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i16_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i32> %a to <8 x i16> + ret <8 x i16> %0 +} + +define <8 x i16> @trunc8i32_8i16_nuw(<8 x i32> %a) { +; SSE2-LABEL: trunc8i32_8i16_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i32_8i16_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i16_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i16_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i16_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i16_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i32> %a to <8 x i16> + ret <8 x i16> %0 +} + +define void @trunc8i32_8i8_nsw(<8 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc8i32_8i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i32> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc8i32_8i8_nuw(<8 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc8i32_8i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i32> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i16_nsw(<16 x i32> %a) { +; SSE2-LABEL: trunc16i32_16i16_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: pslld $16, %xmm3 +; SSE2-NEXT: psrad $16, %xmm3 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc16i32_16i16_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm4, %xmm1 +; SSSE3-NEXT: pshufb %xmm4, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pshufb %xmm4, %xmm3 +; SSSE3-NEXT: pshufb %xmm4, %xmm2 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i16_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdw %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <16 x i32> %a to <16 x i16> + store <16 x i16> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i16_nuw(<16 x i32> %a) { +; SSE2-LABEL: trunc16i32_16i16_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: pslld $16, %xmm3 +; SSE2-NEXT: psrad $16, %xmm3 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc16i32_16i16_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm4, %xmm1 +; SSSE3-NEXT: pshufb %xmm4, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pshufb %xmm4, %xmm3 +; SSSE3-NEXT: pshufb %xmm4, %xmm2 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i16_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdw %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <16 x i32> %a to <16 x i16> + store <16 x i16> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i8_nsw(<16 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc16i32_16i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i8_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <16 x i32> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i8_nuw(<16 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc16i32_16i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i8_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <16 x i32> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i16_16i8_nsw(<16 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc16i16_16i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i16_16i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i16_16i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i16_16i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc16i16_16i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc16i16_16i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc16i16_16i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc16i16_16i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <16 x i16> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i16_16i8_nuw(<16 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc16i16_16i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i16_16i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i16_16i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i16_16i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc16i16_16i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc16i16_16i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc16i16_16i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc16i16_16i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <16 x i16> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc32i16_32i8_nsw(<32 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc32i16_32i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc32i16_32i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packuswb %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc32i16_32i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc32i16_32i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc32i16_32i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, (%rax) +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc32i16_32i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, (%rax) +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc32i16_32i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc32i16_32i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <32 x i16> %a to <32 x i8> + store <32 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc32i16_32i8_nuw(<32 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc32i16_32i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc32i16_32i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packuswb %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc32i16_32i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc32i16_32i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc32i16_32i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, (%rax) +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc32i16_32i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, (%rax) +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc32i16_32i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc32i16_32i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <32 x i16> %a to <32 x i8> + store <32 x i8> %0, ptr undef, align 4 + ret void +} + +define <8 x i32> @trunc2x4i64_8i32_nsw(<4 x i64> %a, <4 x i64> %b) { +; SSE-LABEL: trunc2x4i64_8i32_nsw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i32_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nsw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nsw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nsw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc2x4i64_8i32_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <4 x i64> %a to <4 x i32> + %1 = trunc nsw <4 x i64> %b to <4 x i32> + %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i32> %2 +} + +define <8 x i32> @trunc2x4i64_8i32_nuw(<4 x i64> %a, <4 x i64> %b) { +; SSE-LABEL: trunc2x4i64_8i32_nuw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i32_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nuw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nuw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nuw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc2x4i64_8i32_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <4 x i64> %a to <4 x i32> + %1 = trunc nuw <4 x i64> %b to <4 x i32> + %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i32> %2 +} + +define <8 x i16> @trunc2x4i64_8i16_nsw(<4 x i64> %a, <4 x i64> %b) { +; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i64_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x4i64_8i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i64_8i16_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i64_8i16_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i64_8i16_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i64_8i16_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <4 x i64> %a to <4 x i16> + %1 = trunc nsw <4 x i64> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i16> %2 +} + +define <8 x i16> @trunc2x4i64_8i16_nuw(<4 x i64> %a, <4 x i64> %b) { +; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i64_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x4i64_8i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i64_8i16_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i64_8i16_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i64_8i16_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i64_8i16_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <4 x i64> %a to <4 x i16> + %1 = trunc nuw <4 x i64> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i16> %2 +} + +define <4 x i32> @trunc2x2i64_4i32_nsw(<2 x i64> %a, <2 x i64> %b) { +; SSE-LABEL: trunc2x2i64_4i32_nsw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: retq +; +; AVX-LABEL: trunc2x2i64_4i32_nsw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x2i64_4i32_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x2i64_4i32_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x2i64_4i32_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x2i64_4i32_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <2 x i64> %a to <2 x i32> + %1 = trunc nsw <2 x i64> %b to <2 x i32> + %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + ret <4 x i32> %2 +} + +define <4 x i32> @trunc2x2i64_4i32_nuw(<2 x i64> %a, <2 x i64> %b) { +; SSE-LABEL: trunc2x2i64_4i32_nuw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: retq +; +; AVX-LABEL: trunc2x2i64_4i32_nuw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x2i64_4i32_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x2i64_4i32_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x2i64_4i32_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x2i64_4i32_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <2 x i64> %a to <2 x i32> + %1 = trunc nuw <2 x i64> %b to <2 x i32> + %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + ret <4 x i32> %2 +} + +define <8 x i16> @trunc2x4i32_8i16_nsw(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: trunc2x4i32_8i16_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc2x4i32_8i16_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i32_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc2x4i32_8i16_nsw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i32_8i16_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i32_8i16_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i32_8i16_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i32_8i16_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <4 x i32> %a to <4 x i16> + %1 = trunc nsw <4 x i32> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i16> %2 +} + +define <8 x i16> @trunc2x4i32_8i16_nuw(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: trunc2x4i32_8i16_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc2x4i32_8i16_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i32_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc2x4i32_8i16_nuw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i32_8i16_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i32_8i16_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i32_8i16_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i32_8i16_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <4 x i32> %a to <4 x i16> + %1 = trunc nuw <4 x i32> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i16> %2 +} + +define <32 x i8> @trunc2x16i16_32i8_nsw(<16 x i16> %a, <16 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4 +; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x16i16_32i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: packuswb %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm4, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x16i16_32i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x16i16_32i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x16i16_32i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x16i16_32i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x16i16_32i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x16i16_32i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <16 x i16> %a to <16 x i8> + %1 = trunc nsw <16 x i16> %b to <16 x i8> + %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + ret <32 x i8> %2 +} + +define <32 x i8> @trunc2x16i16_32i8_nuw(<16 x i16> %a, <16 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4 +; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x16i16_32i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: packuswb %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm4, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x16i16_32i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x16i16_32i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x16i16_32i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x16i16_32i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x16i16_32i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x16i16_32i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <16 x i16> %a to <16 x i8> + %1 = trunc nuw <16 x i16> %b to <16 x i8> + %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + ret <32 x i8> %2 +} + +define <16 x i8> @trunc2x8i16_16i8_nsw(<8 x i16> %a, <8 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x8i16_16i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x8i16_16i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x8i16_16i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x8i16_16i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x8i16_16i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x8i16_16i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x8i16_16i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i16> %a to <8 x i8> + %1 = trunc nsw <8 x i16> %b to <8 x i8> + %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ret <16 x i8> %2 +} + +define <16 x i8> @trunc2x8i16_16i8_nuw(<8 x i16> %a, <8 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x8i16_16i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x8i16_16i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x8i16_16i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x8i16_16i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x8i16_16i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x8i16_16i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x8i16_16i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i16> %a to <8 x i8> + %1 = trunc nuw <8 x i16> %b to <8 x i8> + %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ret <16 x i8> %2 +} + +define i64 @trunc8i16_i64_nsw(<8 x i16> %inval) { +; SSE2-LABEL: trunc8i16_i64_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movq %xmm0, %rax +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i16_i64_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movq %xmm0, %rax +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i16_i64_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movq %xmm0, %rax +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc8i16_i64_nsw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc8i16_i64_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i16_i64_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i16_i64_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vmovq %xmm0, %rax +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i16_i64_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0 +; AVX512BWVL-NEXT: vmovq %xmm0, %rax +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i16> %inval to <8 x i8> + %1 = bitcast <8 x i8> %0 to i64 + ret i64 %1 +} + +define i64 @trunc8i16_i64_nuw(<8 x i16> %inval) { +; SSE2-LABEL: trunc8i16_i64_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movq %xmm0, %rax +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i16_i64_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movq %xmm0, %rax +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i16_i64_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movq %xmm0, %rax +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc8i16_i64_nuw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc8i16_i64_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i16_i64_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i16_i64_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vmovq %xmm0, %rax +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i16_i64_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0 +; AVX512BWVL-NEXT: vmovq %xmm0, %rax +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i16> %inval to <8 x i8> + %1 = bitcast <8 x i8> %0 to i64 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll index 691ca40..f7a27a5 100644 --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll @@ -65,6 +65,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzwl %ax, %eax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrl %cl, %eax @@ -74,6 +75,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: movzwl %ax, %eax ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax ; X64-BMI2-NEXT: movb %al, (%rdx) @@ -81,14 +83,15 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; ; X86-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X86-NO-BMI2: # %bb.0: -; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NO-BMI2-NEXT: movzwl (%eax), %eax +; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-BMI2-NEXT: movzwl (%edx), %edx +; X86-NO-BMI2-NEXT: movzwl %dx, %edx ; X86-NO-BMI2-NEXT: shll $3, %ecx ; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NO-BMI2-NEXT: shrl %cl, %eax -; X86-NO-BMI2-NEXT: movb %al, (%edx) +; X86-NO-BMI2-NEXT: shrl %cl, %edx +; X86-NO-BMI2-NEXT: movb %dl, (%eax) ; X86-NO-BMI2-NEXT: retl ; ; X86-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: @@ -97,6 +100,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movzwl (%edx), %edx +; X86-BMI2-NEXT: movzwl %dx, %edx ; X86-BMI2-NEXT: shll $3, %ecx ; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx ; X86-BMI2-NEXT: movb %cl, (%eax) @@ -119,6 +123,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzwl %ax, %eax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrl %cl, %eax @@ -128,6 +133,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: movzwl %ax, %eax ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax ; X64-BMI2-NEXT: movw %ax, (%rdx) @@ -139,6 +145,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NO-BMI2-NEXT: movzwl (%edx), %edx +; X86-NO-BMI2-NEXT: movzwl %dx, %edx ; X86-NO-BMI2-NEXT: shll $3, %ecx ; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NO-BMI2-NEXT: shrl %cl, %edx @@ -151,6 +158,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movzwl (%edx), %edx +; X86-BMI2-NEXT: movzwl %dx, %edx ; X86-BMI2-NEXT: shll $3, %ecx ; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx ; X86-BMI2-NEXT: movw %cx, (%eax) |