aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Analysis/BasicAA/inttoptr_constexpr.ll49
-rw-r--r--llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll92
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir5
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fmul.mir31
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir4
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll16
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vhadd.ll18
-rw-r--r--llvm/test/CodeGen/AArch64/fexplog.ll150
-rw-r--r--llvm/test/CodeGen/AArch64/fpmode.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/fpow.ll30
-rw-r--r--llvm/test/CodeGen/AArch64/frem.ll30
-rw-r--r--llvm/test/CodeGen/AArch64/fsincos.ll78
-rw-r--r--llvm/test/CodeGen/AArch64/llvm.exp10.ll30
-rw-r--r--llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll486
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmul.ll399
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll88
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll19
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll111
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll954
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll3204
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll66
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll4091
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll66
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll116
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll39
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll19
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll36
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll39
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll144
-rw-r--r--llvm/test/CodeGen/PowerPC/coalescer-remat-with-undef-implicit-def-operand.mir28
-rw-r--r--llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll28
-rw-r--r--llvm/test/CodeGen/RISCV/mem.ll25
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll128
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll34
-rw-r--r--llvm/test/CodeGen/X86/addsub-constant-folding.ll36
-rw-r--r--llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll185
-rw-r--r--llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir4
-rw-r--r--llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir47
-rw-r--r--llvm/test/CodeGen/X86/or-lea.ll20
-rw-r--r--llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir348
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll575
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll14
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll671
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll160
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll393
-rw-r--r--llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll64
-rw-r--r--llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll35
-rw-r--r--llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1.ll16
-rw-r--r--llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1_elf.ll51
-rw-r--r--llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test16
-rw-r--r--llvm/test/MC/AArch64/FP8/dot.s6
-rw-r--r--llvm/test/MC/AArch64/SME2/bfadd-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfadd-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfadd.s (renamed from llvm/test/MC/AArch64/SME2p1/bfadd.s)110
-rw-r--r--llvm/test/MC/AArch64/SME2/bfclamp-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfclamp-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfclamp.s (renamed from llvm/test/MC/AArch64/SME2p1/bfclamp.s)30
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmax-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmax-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmax.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmax.s)46
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmaxnm-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmaxnm-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmaxnm.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmaxnm.s)46
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmin-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmin-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmin.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmin.s)46
-rw-r--r--llvm/test/MC/AArch64/SME2/bfminnm-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfminnm-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfminnm.s (renamed from llvm/test/MC/AArch64/SME2p1/bfminnm.s)46
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmla-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmla-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmla.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmla.s)302
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmls-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmls-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmls.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmls.s)302
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmopa-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmopa-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmopa.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmopa.s)38
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmops-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmops-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfmops.s (renamed from llvm/test/MC/AArch64/SME2p1/bfmops.s)38
-rw-r--r--llvm/test/MC/AArch64/SME2/bfsub-diagnostics.s (renamed from llvm/test/MC/AArch64/SME2p1/bfsub-diagnostics.s)2
-rw-r--r--llvm/test/MC/AArch64/SME2/bfsub.s (renamed from llvm/test/MC/AArch64/SME2p1/bfsub.s)110
-rw-r--r--llvm/test/MC/AArch64/SVE2p1/pmov.s36
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s199
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt206
-rw-r--r--llvm/test/MC/LoongArch/Misc/cfi-advance.s27
-rw-r--r--llvm/test/MC/LoongArch/Relocations/relax-addsub.s17
-rw-r--r--llvm/test/MC/RISCV/cfi-advance.s7
-rw-r--r--llvm/test/MC/RISCV/fixups-expr.s28
-rw-r--r--llvm/test/Transforms/ConstraintElimination/sub-nuw.ll123
-rw-r--r--llvm/test/Transforms/InstCombine/and-xor-merge.ll39
-rw-r--r--llvm/test/Transforms/InstCombine/or-xor.ll39
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll27
-rw-r--r--llvm/test/Transforms/SimplifyCFG/switch-dead-default-lookup-table.ll61
-rw-r--r--llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll56
-rw-r--r--llvm/test/Transforms/Util/add-TLI-mappings.ll26
-rw-r--r--llvm/test/tools/llvm-cxxfilt/no-params.test34
96 files changed, 7867 insertions, 7565 deletions
diff --git a/llvm/test/Analysis/BasicAA/inttoptr_constexpr.ll b/llvm/test/Analysis/BasicAA/inttoptr_constexpr.ll
new file mode 100644
index 0000000..afe3602
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/inttoptr_constexpr.ll
@@ -0,0 +1,49 @@
+; RUN: opt -passes=aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
+
+; CHECK: NoAlias: i8* %a, i8* %gep
+define void @inttoptr_alloca() {
+ %a = alloca i8
+ %a.int = ptrtoint ptr %a to i64
+ %a.int.1 = add i64 %a.int, 1
+ %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %a.int.1
+ %load = load i8, ptr %gep
+ store i8 1, ptr %a
+ ret void
+}
+
+; CHECK: NoAlias: i8* %a, i8* %gep
+define void @inttoptr_alloca_unknown_relation(i64 %offset) {
+ %a = alloca i8
+ %a.int = ptrtoint ptr %a to i64
+ %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %offset
+ %load = load i8, ptr %gep
+ store i8 1, ptr %a
+ ret void
+}
+
+; CHECK: NoAlias: i8* %a, i8* %gep
+define void @inttoptr_alloca_noescape(i64 %offset) {
+ %a = alloca i8
+ %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %offset
+ %load = load i8, ptr %gep
+ store i8 1, ptr %a
+ ret void
+}
+
+; CHECK: NoAlias: i8* %a, i8* %gep
+define void @inttoptr_noalias(ptr noalias %a) {
+ %a.int = ptrtoint ptr %a to i64
+ %a.int.1 = add i64 %a.int, 1
+ %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %a.int.1
+ %load = load i8, ptr %gep
+ store i8 1, ptr %a
+ ret void
+}
+
+; CHECK: NoAlias: i8* %a, i8* %gep
+define void @inttoptr_noalias_noescape(ptr noalias %a, i64 %offset) {
+ %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %offset
+ %load = load i8, ptr %gep
+ store i8 1, ptr %a
+ ret void
+}
diff --git a/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll b/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll
index d96ff49..24ed59c 100644
--- a/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll
+++ b/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll
@@ -33,5 +33,5 @@ for.end:
!7 = distinct !{!7, !9} ; LoopID
!9 = !{!"llvm.loop.parallel_accesses", !6}
-
-; CHECK: Parallel Loop
+; CHECK: Loop info for function 'func':
+; CHECK: Parallel Loop at depth 1 containing:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 8529dd3..4c07081 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -13,7 +13,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -47,13 +47,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0, $x2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
+ ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -93,7 +93,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -249,7 +249,7 @@ define i32 @fetch_and_nand(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -302,7 +302,7 @@ define i32 @fetch_and_or(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
@@ -735,8 +735,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -761,7 +761,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -785,8 +785,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -810,8 +810,8 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -835,8 +835,8 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -860,8 +860,8 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -885,10 +885,10 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -912,10 +912,10 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -940,10 +940,10 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 7
; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -968,10 +968,10 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 7
; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -995,8 +995,8 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1021,7 +1021,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1045,8 +1045,8 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1070,8 +1070,8 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1095,8 +1095,8 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1120,8 +1120,8 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1145,10 +1145,10 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1172,10 +1172,10 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1200,10 +1200,10 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 15
; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1228,10 +1228,10 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 15
; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1257,7 +1257,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
; CHECK-NEXT: liveins: $w1, $x2, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
@@ -1300,7 +1300,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
; CHECK-NEXT: liveins: $w1, $x2, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir
index 14c53902..2d72e5f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir
@@ -79,11 +79,8 @@ body: |
; CHECK-LABEL: name: func_reset
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s64)
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $x0 = COPY [[INTTOPTR]](p0)
- ; CHECK-NEXT: BL &fesetmode, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
- ; CHECK-NEXT: RET_ReallyLR
+ ; CHECK-NEXT: TCRETURNdi &fesetmode, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0
G_RESET_FPMODE
RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fmul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fmul.mir
new file mode 100644
index 0000000..9d9e96d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fmul.mir
@@ -0,0 +1,31 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+---
+name: mul_2H
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: mul_2H
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FMUL]](<4 x s32>)
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s32>) = G_FMUL [[UV]], [[UV1]]
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMUL1]](<2 x s32>)
+ ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV3]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FMUL2]](s32)
+ ; CHECK-NEXT: $s0 = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(<4 x s32>) = COPY $q1
+ %0:_(<8 x s32>) = G_CONCAT_VECTORS %1(<4 x s32>), %2(<4 x s32>)
+ %5:_(s32) = nnan ninf nsz arcp contract afn reassoc G_VECREDUCE_FMUL %0(<8 x s32>)
+ $s0 = COPY %5(s32)
+ RET_ReallyLR implicit $s0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 178db85..866fe6f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -736,8 +736,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_VECREDUCE_FMUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
index f853c08..ac08825 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function icmp_i8_shift_and_cmp --version 4
; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
define i32 @icmp_eq_imm(i32 %a) nounwind ssp {
@@ -257,3 +258,18 @@ entry:
%conv2 = zext i1 %cmp to i32
ret i32 %conv2
}
+
+define i32 @icmp_i8_shift_and_cmp(i8 %a, i8 %b) {
+entry:
+; CHECK-LABEL: icmp_i8_shift_and_cmp:
+; CHECK: ubfiz [[REG1:w[0-9]+]], w0, #3, #5
+; CHECK-NEXT: sxtb [[REG0:w[0-9]+]], w1
+; CHECK-NEXT: cmp [[REG0]], [[REG1]], sxtb
+; CHECK-NEXT: cset [[REG:w[0-9]+]], eq
+; CHECK-NEXT: and w0, [[REG]], #0x1
+ %op = shl i8 %a, 3
+ %cmp = icmp eq i8 %b, %op
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index e287eff..dda610e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -1392,6 +1392,24 @@ define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) {
ret <8 x i8> %result
}
+define <4 x i16> @ext_via_i19(<4 x i16> %a) {
+; CHECK-LABEL: ext_via_i19:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi.4s v1, #1
+; CHECK-NEXT: uaddw.4s v0, v1, v0
+; CHECK-NEXT: uhadd.4s v0, v0, v1
+; CHECK-NEXT: xtn.4h v0, v0
+; CHECK-NEXT: ret
+ %t3 = zext <4 x i16> %a to <4 x i32>
+ %t4 = add <4 x i32> %t3, <i32 1, i32 1, i32 1, i32 1>
+ %t5 = trunc <4 x i32> %t4 to <4 x i19>
+ %new0 = add <4 x i19> %t5, <i19 1, i19 1, i19 1, i19 1>
+ %new1 = lshr <4 x i19> %new0, <i19 1, i19 1, i19 1, i19 1>
+ %last = zext <4 x i19> %new1 to <4 x i32>
+ %t6 = trunc <4 x i32> %last to <4 x i16>
+ ret <4 x i16> %t6
+}
+
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/fexplog.ll b/llvm/test/CodeGen/AArch64/fexplog.ll
index 26c0b68..2848a6b 100644
--- a/llvm/test/CodeGen/AArch64/fexplog.ll
+++ b/llvm/test/CodeGen/AArch64/fexplog.ll
@@ -3,36 +3,18 @@
; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define double @exp_f64(double %a) {
-; CHECK-SD-LABEL: exp_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b exp
-;
-; CHECK-GI-LABEL: exp_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl exp
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: exp_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b exp
entry:
%c = call double @llvm.exp.f64(double %a)
ret double %c
}
define float @exp_f32(float %a) {
-; CHECK-SD-LABEL: exp_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b expf
-;
-; CHECK-GI-LABEL: exp_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl expf
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: exp_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b expf
entry:
%c = call float @llvm.exp.f32(float %a)
ret float %c
@@ -1280,36 +1262,18 @@ entry:
}
define double @exp2_f64(double %a) {
-; CHECK-SD-LABEL: exp2_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b exp2
-;
-; CHECK-GI-LABEL: exp2_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl exp2
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: exp2_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b exp2
entry:
%c = call double @llvm.exp2.f64(double %a)
ret double %c
}
define float @exp2_f32(float %a) {
-; CHECK-SD-LABEL: exp2_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b exp2f
-;
-; CHECK-GI-LABEL: exp2_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl exp2f
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: exp2_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b exp2f
entry:
%c = call float @llvm.exp2.f32(float %a)
ret float %c
@@ -2557,36 +2521,18 @@ entry:
}
define double @log_f64(double %a) {
-; CHECK-SD-LABEL: log_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b log
-;
-; CHECK-GI-LABEL: log_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl log
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: log_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b log
entry:
%c = call double @llvm.log.f64(double %a)
ret double %c
}
define float @log_f32(float %a) {
-; CHECK-SD-LABEL: log_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b logf
-;
-; CHECK-GI-LABEL: log_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl logf
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: log_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b logf
entry:
%c = call float @llvm.log.f32(float %a)
ret float %c
@@ -3834,36 +3780,18 @@ entry:
}
define double @log2_f64(double %a) {
-; CHECK-SD-LABEL: log2_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b log2
-;
-; CHECK-GI-LABEL: log2_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl log2
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: log2_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b log2
entry:
%c = call double @llvm.log2.f64(double %a)
ret double %c
}
define float @log2_f32(float %a) {
-; CHECK-SD-LABEL: log2_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b log2f
-;
-; CHECK-GI-LABEL: log2_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl log2f
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: log2_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b log2f
entry:
%c = call float @llvm.log2.f32(float %a)
ret float %c
@@ -5111,36 +5039,18 @@ entry:
}
define double @log10_f64(double %a) {
-; CHECK-SD-LABEL: log10_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b log10
-;
-; CHECK-GI-LABEL: log10_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl log10
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: log10_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b log10
entry:
%c = call double @llvm.log10.f64(double %a)
ret double %c
}
define float @log10_f32(float %a) {
-; CHECK-SD-LABEL: log10_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b log10f
-;
-; CHECK-GI-LABEL: log10_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl log10f
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: log10_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b log10f
entry:
%c = call float @llvm.log10.f32(float %a)
ret float %c
diff --git a/llvm/test/CodeGen/AArch64/fpmode.ll b/llvm/test/CodeGen/AArch64/fpmode.ll
index a4f2c4f..ebfb069 100644
--- a/llvm/test/CodeGen/AArch64/fpmode.ll
+++ b/llvm/test/CodeGen/AArch64/fpmode.ll
@@ -63,11 +63,8 @@ define void @func_reset_fpmode_soft() #0 {
;
; GIS-LABEL: func_reset_fpmode_soft:
; GIS: // %bb.0: // %entry
-; GIS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; GIS-NEXT: mov x0, #-1 // =0xffffffffffffffff
-; GIS-NEXT: bl fesetmode
-; GIS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GIS-NEXT: ret
+; GIS-NEXT: b fesetmode
entry:
call void @llvm.reset.fpmode()
ret void
diff --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll
index 7681ab5..a55c0db 100644
--- a/llvm/test/CodeGen/AArch64/fpow.ll
+++ b/llvm/test/CodeGen/AArch64/fpow.ll
@@ -3,36 +3,18 @@
; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define double @pow_f64(double %a, double %b) {
-; CHECK-SD-LABEL: pow_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b pow
-;
-; CHECK-GI-LABEL: pow_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl pow
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: pow_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b pow
entry:
%c = call double @llvm.pow.f64(double %a, double %b)
ret double %c
}
define float @pow_f32(float %a, float %b) {
-; CHECK-SD-LABEL: pow_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b powf
-;
-; CHECK-GI-LABEL: pow_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl powf
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: pow_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b powf
entry:
%c = call float @llvm.pow.f32(float %a, float %b)
ret float %c
diff --git a/llvm/test/CodeGen/AArch64/frem.ll b/llvm/test/CodeGen/AArch64/frem.ll
index ed0f6c4..eb26128 100644
--- a/llvm/test/CodeGen/AArch64/frem.ll
+++ b/llvm/test/CodeGen/AArch64/frem.ll
@@ -5,36 +5,18 @@
; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define double @frem_f64(double %a, double %b) {
-; CHECK-SD-LABEL: frem_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b fmod
-;
-; CHECK-GI-LABEL: frem_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl fmod
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: frem_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b fmod
entry:
%c = frem double %a, %b
ret double %c
}
define float @frem_f32(float %a, float %b) {
-; CHECK-SD-LABEL: frem_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b fmodf
-;
-; CHECK-GI-LABEL: frem_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl fmodf
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: frem_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b fmodf
entry:
%c = frem float %a, %b
ret float %c
diff --git a/llvm/test/CodeGen/AArch64/fsincos.ll b/llvm/test/CodeGen/AArch64/fsincos.ll
index 9784b9e..aef0b2e 100644
--- a/llvm/test/CodeGen/AArch64/fsincos.ll
+++ b/llvm/test/CodeGen/AArch64/fsincos.ll
@@ -3,36 +3,18 @@
; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define double @sin_f64(double %a) {
-; CHECK-SD-LABEL: sin_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b sin
-;
-; CHECK-GI-LABEL: sin_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl sin
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sin_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b sin
entry:
%c = call double @llvm.sin.f64(double %a)
ret double %c
}
define float @sin_f32(float %a) {
-; CHECK-SD-LABEL: sin_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b sinf
-;
-; CHECK-GI-LABEL: sin_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl sinf
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sin_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b sinf
entry:
%c = call float @llvm.sin.f32(float %a)
ret float %c
@@ -1280,36 +1262,18 @@ entry:
}
define double @cos_f64(double %a) {
-; CHECK-SD-LABEL: cos_f64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b cos
-;
-; CHECK-GI-LABEL: cos_f64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl cos
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cos_f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b cos
entry:
%c = call double @llvm.cos.f64(double %a)
ret double %c
}
define float @cos_f32(float %a) {
-; CHECK-SD-LABEL: cos_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: b cosf
-;
-; CHECK-GI-LABEL: cos_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: .cfi_offset w30, -16
-; CHECK-GI-NEXT: bl cosf
-; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cos_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b cosf
entry:
%c = call float @llvm.cos.f32(float %a)
ret float %c
@@ -2556,6 +2520,24 @@ entry:
ret <16 x half> %c
}
+; This is testing that we do not produce incorrect tailcall lowerings
+define i64 @donttailcall(double noundef %x, double noundef %y) {
+; CHECK-LABEL: donttailcall:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl sin
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %call = tail call double @llvm.sin.f64(double noundef %x)
+ %0 = bitcast double %call to i64
+ ret i64 %0
+}
+
+
declare <16 x half> @llvm.cos.v16f16(<16 x half>)
declare <16 x half> @llvm.sin.v16f16(<16 x half>)
declare <2 x double> @llvm.cos.v2f64(<2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
index c3f14b0..8e6b15c 100644
--- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
@@ -298,18 +298,9 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
}
define float @exp10_f32(float %x) {
-; SDAG-LABEL: exp10_f32:
-; SDAG: // %bb.0:
-; SDAG-NEXT: b exp10f
-;
-; GISEL-LABEL: exp10_f32:
-; GISEL: // %bb.0:
-; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISEL-NEXT: .cfi_def_cfa_offset 16
-; GISEL-NEXT: .cfi_offset w30, -16
-; GISEL-NEXT: bl exp10f
-; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISEL-NEXT: ret
+; CHECK-LABEL: exp10_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: b exp10f
%r = call float @llvm.exp10.f32(float %x)
ret float %r
}
@@ -541,18 +532,9 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
}
define double @exp10_f64(double %x) {
-; SDAG-LABEL: exp10_f64:
-; SDAG: // %bb.0:
-; SDAG-NEXT: b exp10
-;
-; GISEL-LABEL: exp10_f64:
-; GISEL: // %bb.0:
-; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISEL-NEXT: .cfi_def_cfa_offset 16
-; GISEL-NEXT: .cfi_offset w30, -16
-; GISEL-NEXT: bl exp10
-; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISEL-NEXT: ret
+; CHECK-LABEL: exp10_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: b exp10
%r = call double @llvm.exp10.f64(double %x)
ret double %r
}
diff --git a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
index a202bfb..1f7584b 100644
--- a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
+++ b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
@@ -1,13 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s | FileCheck %s
+; RUN: llc -o - -mtriple=arm64e-apple-macosx %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+; x16 is not available, shrink-wrapping cannot happen because
+; StoreSwiftAsyncContext needs it.
define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr swiftasync %as) #0 {
; CHECK-LABEL: test_async_with_jumptable_x16_clobbered:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: orr x29, x29, #0x1000000000000000
-; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-NEXT: add x16, sp, #8
; CHECK-NEXT: movk x16, #49946, lsl #48
@@ -18,83 +20,52 @@ define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr s
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: mov x20, x22
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: Lloh0:
-; CHECK-NEXT: adrp x9, LJTI0_0@PAGE
-; CHECK-NEXT: Lloh1:
-; CHECK-NEXT: add x9, x9, LJTI0_0@PAGEOFF
-; CHECK-NEXT: Ltmp0:
-; CHECK-NEXT: adr x10, Ltmp0
-; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
-; CHECK-NEXT: add x10, x10, x11
-; CHECK-NEXT: mov x19, x20
-; CHECK-NEXT: br x10
-; CHECK-NEXT: LBB0_1: ; %then.2
-; CHECK-NEXT: mov x19, #0 ; =0x0
-; CHECK-NEXT: b LBB0_3
-; CHECK-NEXT: LBB0_2: ; %then.3
-; CHECK-NEXT: mov x19, x22
-; CHECK-NEXT: LBB0_3: ; %exit
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: cbnz x8, LBB0_2
+; CHECK-NEXT: ; %bb.1: ; %then.1
+; CHECK-NEXT: str xzr, [x22]
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: LBB0_2: ; %exit
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: bl _foo
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
-; CHECK-NEXT: br x2
-; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
-; CHECK-NEXT: .cfi_endproc
-; CHECK-NEXT: .section __TEXT,__const
-; CHECK-NEXT: .p2align 2, 0x0
-; CHECK-NEXT: LJTI0_0:
-; CHECK-NEXT: .long LBB0_3-Ltmp0
-; CHECK-NEXT: .long LBB0_1-Ltmp0
-; CHECK-NEXT: .long LBB0_1-Ltmp0
-; CHECK-NEXT: .long LBB0_2-Ltmp0
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: br x1
entry:
%x16 = tail call i64 asm "", "={x16}"()
%l = load i64, ptr %src, align 8
- switch i64 %l, label %dead [
- i64 0, label %exit
- i64 1, label %then.1
- i64 2, label %then.2
- i64 3, label %then.3
- ]
+ %c = icmp eq i64 %l, 0
+ br i1 %c, label %then.1, label %exit
then.1:
+ store i64 0, ptr %src
br label %exit
-then.2:
- br label %exit
-
-then.3:
- br label %exit
-
-dead: ; preds = %entryresume.5
- unreachable
-
exit:
- %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ]
+ %p = phi ptr [ %src, %then.1 ], [ %as, %entry ]
tail call void asm sideeffect "", "{x16}"(i64 %x16)
- %r = call i64 @foo()
+ %r = call i64 @foo(ptr %p)
%fn = inttoptr i64 %r to ptr
- musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as)
+ musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as)
ret void
}
+; x17 is not available, shrink-wrapping cannot happen because
+; StoreSwiftAsyncContext needs it.
define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr swiftasync %as) #0 {
; CHECK-LABEL: test_async_with_jumptable_x17_clobbered:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: orr x29, x29, #0x1000000000000000
-; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-NEXT: add x16, sp, #8
; CHECK-NEXT: movk x16, #49946, lsl #48
@@ -105,86 +76,61 @@ define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr s
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: mov x20, x22
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: Lloh2:
-; CHECK-NEXT: adrp x9, LJTI1_0@PAGE
-; CHECK-NEXT: Lloh3:
-; CHECK-NEXT: add x9, x9, LJTI1_0@PAGEOFF
-; CHECK-NEXT: Ltmp1:
-; CHECK-NEXT: adr x10, Ltmp1
-; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
-; CHECK-NEXT: add x10, x10, x11
-; CHECK-NEXT: mov x19, x20
-; CHECK-NEXT: br x10
-; CHECK-NEXT: LBB1_1: ; %then.2
-; CHECK-NEXT: mov x19, #0 ; =0x0
-; CHECK-NEXT: b LBB1_3
-; CHECK-NEXT: LBB1_2: ; %then.3
-; CHECK-NEXT: mov x19, x22
-; CHECK-NEXT: LBB1_3: ; %exit
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: cbnz x8, LBB1_2
+; CHECK-NEXT: ; %bb.1: ; %then.1
+; CHECK-NEXT: str xzr, [x22]
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: LBB1_2: ; %exit
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: bl _foo
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
-; CHECK-NEXT: br x2
-; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3
-; CHECK-NEXT: .cfi_endproc
-; CHECK-NEXT: .section __TEXT,__const
-; CHECK-NEXT: .p2align 2, 0x0
-; CHECK-NEXT: LJTI1_0:
-; CHECK-NEXT: .long LBB1_3-Ltmp1
-; CHECK-NEXT: .long LBB1_1-Ltmp1
-; CHECK-NEXT: .long LBB1_1-Ltmp1
-; CHECK-NEXT: .long LBB1_2-Ltmp1
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: br x1
entry:
%x17 = tail call i64 asm "", "={x17}"()
%l = load i64, ptr %src, align 8
- switch i64 %l, label %dead [
- i64 0, label %exit
- i64 1, label %then.1
- i64 2, label %then.2
- i64 3, label %then.3
- ]
+ %c = icmp eq i64 %l, 0
+ br i1 %c, label %then.1, label %exit
then.1:
+ store i64 0, ptr %src
br label %exit
-then.2:
- br label %exit
-
-then.3:
- br label %exit
-
-dead: ; preds = %entryresume.5
- unreachable
-
exit:
- %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ]
+ %p = phi ptr [ %src, %then.1 ], [ %as, %entry ]
tail call void asm sideeffect "", "{x17}"(i64 %x17)
- %r = call i64 @foo()
+ %r = call i64 @foo(ptr %p)
%fn = inttoptr i64 %r to ptr
- musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as)
+ musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as)
ret void
}
define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr swiftasync %as) #0 {
; CHECK-LABEL: test_async_with_jumptable_x1_clobbered:
; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: cbnz x8, LBB2_2
+; CHECK-NEXT: ; %bb.1: ; %then.1
+; CHECK-NEXT: str xzr, [x22]
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: LBB2_2: ; %exit
; CHECK-NEXT: orr x29, x29, #0x1000000000000000
-; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-NEXT: add x16, sp, #8
; CHECK-NEXT: movk x16, #49946, lsl #48
@@ -195,85 +141,52 @@ define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr sw
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_offset w19, -32
-; CHECK-NEXT: mov x20, x22
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: Lloh4:
-; CHECK-NEXT: adrp x9, LJTI2_0@PAGE
-; CHECK-NEXT: Lloh5:
-; CHECK-NEXT: add x9, x9, LJTI2_0@PAGEOFF
-; CHECK-NEXT: Ltmp2:
-; CHECK-NEXT: adr x10, Ltmp2
-; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
-; CHECK-NEXT: add x10, x10, x11
-; CHECK-NEXT: mov x19, x20
-; CHECK-NEXT: br x10
-; CHECK-NEXT: LBB2_1: ; %then.2
-; CHECK-NEXT: mov x19, #0 ; =0x0
-; CHECK-NEXT: b LBB2_3
-; CHECK-NEXT: LBB2_2: ; %then.3
-; CHECK-NEXT: mov x19, x22
-; CHECK-NEXT: LBB2_3: ; %exit
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: bl _foo
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
-; CHECK-NEXT: br x2
-; CHECK-NEXT: .loh AdrpAdd Lloh4, Lloh5
-; CHECK-NEXT: .cfi_endproc
-; CHECK-NEXT: .section __TEXT,__const
-; CHECK-NEXT: .p2align 2, 0x0
-; CHECK-NEXT: LJTI2_0:
-; CHECK-NEXT: .long LBB2_3-Ltmp2
-; CHECK-NEXT: .long LBB2_1-Ltmp2
-; CHECK-NEXT: .long LBB2_1-Ltmp2
-; CHECK-NEXT: .long LBB2_2-Ltmp2
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: br x1
entry:
%x1 = tail call i64 asm "", "={x1}"()
%l = load i64, ptr %src, align 8
- switch i64 %l, label %dead [
- i64 0, label %exit
- i64 1, label %then.1
- i64 2, label %then.2
- i64 3, label %then.3
- ]
+ %c = icmp eq i64 %l, 0
+ br i1 %c, label %then.1, label %exit
then.1:
+ store i64 0, ptr %src
br label %exit
-then.2:
- br label %exit
-
-then.3:
- br label %exit
-
-dead: ; preds = %entryresume.5
- unreachable
-
exit:
- %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ]
+ %p = phi ptr [ %src, %then.1 ], [ %as, %entry ]
tail call void asm sideeffect "", "{x1}"(i64 %x1)
- %r = call i64 @foo()
+ %r = call i64 @foo(ptr %p)
%fn = inttoptr i64 %r to ptr
- musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as)
+ musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as)
ret void
}
define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr swiftasync %as) #0 {
; CHECK-LABEL: test_async_with_jumptable_x1_x9_clobbered:
; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: cbnz x8, LBB3_2
+; CHECK-NEXT: ; %bb.1: ; %then.1
+; CHECK-NEXT: str xzr, [x22]
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: LBB3_2: ; %exit
; CHECK-NEXT: orr x29, x29, #0x1000000000000000
-; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-NEXT: add x16, sp, #8
; CHECK-NEXT: movk x16, #49946, lsl #48
@@ -284,76 +197,35 @@ define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_offset w19, -32
-; CHECK-NEXT: mov x20, x22
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: Lloh6:
-; CHECK-NEXT: adrp x10, LJTI3_0@PAGE
-; CHECK-NEXT: Lloh7:
-; CHECK-NEXT: add x10, x10, LJTI3_0@PAGEOFF
-; CHECK-NEXT: Ltmp3:
-; CHECK-NEXT: adr x11, Ltmp3
-; CHECK-NEXT: ldrsw x12, [x10, x8, lsl #2]
-; CHECK-NEXT: add x11, x11, x12
-; CHECK-NEXT: mov x19, x20
-; CHECK-NEXT: br x11
-; CHECK-NEXT: LBB3_1: ; %then.2
-; CHECK-NEXT: mov x19, #0 ; =0x0
-; CHECK-NEXT: b LBB3_3
-; CHECK-NEXT: LBB3_2: ; %then.3
-; CHECK-NEXT: mov x19, x22
-; CHECK-NEXT: LBB3_3: ; %exit
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: bl _foo
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
-; CHECK-NEXT: br x2
-; CHECK-NEXT: .loh AdrpAdd Lloh6, Lloh7
-; CHECK-NEXT: .cfi_endproc
-; CHECK-NEXT: .section __TEXT,__const
-; CHECK-NEXT: .p2align 2, 0x0
-; CHECK-NEXT: LJTI3_0:
-; CHECK-NEXT: .long LBB3_3-Ltmp3
-; CHECK-NEXT: .long LBB3_1-Ltmp3
-; CHECK-NEXT: .long LBB3_1-Ltmp3
-; CHECK-NEXT: .long LBB3_2-Ltmp3
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: br x1
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x9 = tail call i64 asm "", "={x9}"()
%l = load i64, ptr %src, align 8
- switch i64 %l, label %dead [
- i64 0, label %exit
- i64 1, label %then.1
- i64 2, label %then.2
- i64 3, label %then.3
- ]
+ %c = icmp eq i64 %l, 0
+ br i1 %c, label %then.1, label %exit
then.1:
+ store i64 0, ptr %src
br label %exit
-then.2:
- br label %exit
-
-then.3:
- br label %exit
-
-dead: ; preds = %entryresume.5
- unreachable
-
exit:
- %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ]
+ %p = phi ptr [ %src, %then.1 ], [ %as, %entry ]
tail call void asm sideeffect "", "{x1}"(i64 %x1)
tail call void asm sideeffect "", "{x9}"(i64 %x9)
- %r = call i64 @foo()
+ %r = call i64 @foo(ptr %p)
%fn = inttoptr i64 %r to ptr
- musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as)
+ musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as)
ret void
}
@@ -361,23 +233,8 @@ exit:
define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %src, ptr swiftasync %as) #0 {
; CHECK-LABEL: test_async_with_jumptable_2_available_regs_left:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: orr x29, x29, #0x1000000000000000
-; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
-; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; CHECK-NEXT: add x16, sp, #8
-; CHECK-NEXT: movk x16, #49946, lsl #48
-; CHECK-NEXT: mov x17, x22
-; CHECK-NEXT: pacdb x17, x16
-; CHECK-NEXT: str x17, [sp, #8]
-; CHECK-NEXT: add x29, sp, #16
-; CHECK-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_offset w19, -32
-; CHECK-NEXT: ; InlineAsm Start
-; CHECK-NEXT: ; InlineAsm End
-; CHECK-NEXT: ; InlineAsm Start
-; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
@@ -404,27 +261,27 @@ define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %sr
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
-; CHECK-NEXT: ldr x10, [x0]
-; CHECK-NEXT: mov x20, x22
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: Lloh8:
-; CHECK-NEXT: adrp x17, LJTI4_0@PAGE
-; CHECK-NEXT: Lloh9:
-; CHECK-NEXT: add x17, x17, LJTI4_0@PAGEOFF
-; CHECK-NEXT: Ltmp4:
-; CHECK-NEXT: adr x0, Ltmp4
-; CHECK-NEXT: ldrsw x19, [x17, x10, lsl #2]
-; CHECK-NEXT: add x0, x0, x19
-; CHECK-NEXT: mov x19, x20
-; CHECK-NEXT: br x0
-; CHECK-NEXT: LBB4_1: ; %then.2
-; CHECK-NEXT: mov x19, #0 ; =0x0
-; CHECK-NEXT: b LBB4_3
-; CHECK-NEXT: LBB4_2: ; %then.3
-; CHECK-NEXT: mov x19, x22
-; CHECK-NEXT: LBB4_3: ; %exit
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: ldr x10, [x22]
+; CHECK-NEXT: cbnz x10, LBB4_2
+; CHECK-NEXT: ; %bb.1: ; %then.1
+; CHECK-NEXT: str xzr, [x22]
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: LBB4_2: ; %exit
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #8
+; CHECK-NEXT: movk x16, #49946, lsl #48
+; CHECK-NEXT: mov x17, x22
+; CHECK-NEXT: pacdb x17, x16
+; CHECK-NEXT: str x17, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
@@ -454,22 +311,12 @@ define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %sr
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: bl _foo
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
-; CHECK-NEXT: br x2
-; CHECK-NEXT: .loh AdrpAdd Lloh8, Lloh9
-; CHECK-NEXT: .cfi_endproc
-; CHECK-NEXT: .section __TEXT,__const
-; CHECK-NEXT: .p2align 2, 0x0
-; CHECK-NEXT: LJTI4_0:
-; CHECK-NEXT: .long LBB4_3-Ltmp4
-; CHECK-NEXT: .long LBB4_1-Ltmp4
-; CHECK-NEXT: .long LBB4_1-Ltmp4
-; CHECK-NEXT: .long LBB4_2-Ltmp4
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: br x1
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x2 = tail call i64 asm "", "={x2}"()
@@ -485,29 +332,16 @@ entry:
%x13 = tail call i64 asm "", "={x13}"()
%x14 = tail call i64 asm "", "={x14}"()
%x15 = tail call i64 asm "", "={x15}"()
- %x16 = tail call i64 asm "", "={x16}"()
%l = load i64, ptr %src, align 8
- switch i64 %l, label %dead [
- i64 0, label %exit
- i64 1, label %then.1
- i64 2, label %then.2
- i64 3, label %then.3
- ]
+ %c = icmp eq i64 %l, 0
+ br i1 %c, label %then.1, label %exit
then.1:
+ store i64 0, ptr %src
br label %exit
-then.2:
- br label %exit
-
-then.3:
- br label %exit
-
-dead: ; preds = %entryresume.5
- unreachable
-
exit:
- %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ]
+ %p = phi ptr [ %src, %then.1 ], [ %as, %entry ]
tail call void asm sideeffect "", "{x1}"(i64 %x1)
tail call void asm sideeffect "", "{x2}"(i64 %x2)
tail call void asm sideeffect "", "{x3}"(i64 %x3)
@@ -522,37 +356,32 @@ exit:
tail call void asm sideeffect "", "{x13}"(i64 %x13)
tail call void asm sideeffect "", "{x14}"(i64 %x14)
tail call void asm sideeffect "", "{x15}"(i64 %x15)
- tail call void asm sideeffect "", "{x16}"(i64 %x16)
- %r = call i64 @foo()
+ %r = call i64 @foo(ptr %p)
%fn = inttoptr i64 %r to ptr
- musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as)
+ musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as)
ret void
}
+
; There is only 1 available scratch registers left, shrink-wrapping cannot
; happen because StoreSwiftAsyncContext needs 2 free scratch registers.
define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src, ptr swiftasync %as) #0 {
; CHECK-LABEL: test_async_with_jumptable_1_available_reg_left:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: orr x29, x29, #0x1000000000000000
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: stp x21, x19, [sp, #8] ; 16-byte Folded Spill
-; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
-; CHECK-NEXT: add x16, sp, #24
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #8
; CHECK-NEXT: movk x16, #49946, lsl #48
; CHECK-NEXT: mov x17, x22
; CHECK-NEXT: pacdb x17, x16
-; CHECK-NEXT: str x17, [sp, #24]
-; CHECK-NEXT: add x29, sp, #32
+; CHECK-NEXT: str x17, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_offset w19, -32
-; CHECK-NEXT: .cfi_offset w21, -40
-; CHECK-NEXT: ; InlineAsm Start
-; CHECK-NEXT: ; InlineAsm End
-; CHECK-NEXT: ; InlineAsm Start
-; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
@@ -581,27 +410,15 @@ define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
-; CHECK-NEXT: ldr x10, [x0]
-; CHECK-NEXT: mov x20, x22
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: Lloh10:
-; CHECK-NEXT: adrp x0, LJTI5_0@PAGE
-; CHECK-NEXT: Lloh11:
-; CHECK-NEXT: add x0, x0, LJTI5_0@PAGEOFF
-; CHECK-NEXT: Ltmp5:
-; CHECK-NEXT: adr x21, Ltmp5
-; CHECK-NEXT: ldrsw x19, [x0, x10, lsl #2]
-; CHECK-NEXT: add x21, x21, x19
-; CHECK-NEXT: mov x19, x20
-; CHECK-NEXT: br x21
-; CHECK-NEXT: LBB5_1: ; %then.2
-; CHECK-NEXT: mov x19, #0 ; =0x0
-; CHECK-NEXT: b LBB5_3
-; CHECK-NEXT: LBB5_2: ; %then.3
-; CHECK-NEXT: mov x19, x22
-; CHECK-NEXT: LBB5_3: ; %exit
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: ldr x10, [x22]
+; CHECK-NEXT: cbnz x10, LBB5_2
+; CHECK-NEXT: ; %bb.1: ; %then.1
+; CHECK-NEXT: str xzr, [x22]
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: LBB5_2: ; %exit
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
@@ -633,23 +450,12 @@ define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: bl _foo
-; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: mov x1, x20
-; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
-; CHECK-NEXT: ldp x21, x19, [sp, #8] ; 16-byte Folded Reload
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: br x2
-; CHECK-NEXT: .loh AdrpAdd Lloh10, Lloh11
-; CHECK-NEXT: .cfi_endproc
-; CHECK-NEXT: .section __TEXT,__const
-; CHECK-NEXT: .p2align 2, 0x0
-; CHECK-NEXT: LJTI5_0:
-; CHECK-NEXT: .long LBB5_3-Ltmp5
-; CHECK-NEXT: .long LBB5_1-Ltmp5
-; CHECK-NEXT: .long LBB5_1-Ltmp5
-; CHECK-NEXT: .long LBB5_2-Ltmp5
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: br x1
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x2 = tail call i64 asm "", "={x2}"()
@@ -666,29 +472,16 @@ entry:
%x14 = tail call i64 asm "", "={x14}"()
%x15 = tail call i64 asm "", "={x15}"()
%x16 = tail call i64 asm "", "={x16}"()
- %x17 = tail call i64 asm "", "={x17}"()
%l = load i64, ptr %src, align 8
- switch i64 %l, label %dead [
- i64 0, label %exit
- i64 1, label %then.1
- i64 2, label %then.2
- i64 3, label %then.3
- ]
+ %c = icmp eq i64 %l, 0
+ br i1 %c, label %then.1, label %exit
then.1:
+ store i64 0, ptr %src
br label %exit
-then.2:
- br label %exit
-
-then.3:
- br label %exit
-
-dead: ; preds = %entryresume.5
- unreachable
-
exit:
- %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ]
+ %p = phi ptr [ %src, %then.1 ], [ %as, %entry ]
tail call void asm sideeffect "", "{x1}"(i64 %x1)
tail call void asm sideeffect "", "{x2}"(i64 %x2)
tail call void asm sideeffect "", "{x3}"(i64 %x3)
@@ -704,13 +497,12 @@ exit:
tail call void asm sideeffect "", "{x14}"(i64 %x14)
tail call void asm sideeffect "", "{x15}"(i64 %x15)
tail call void asm sideeffect "", "{x16}"(i64 %x16)
- tail call void asm sideeffect "", "{x17}"(i64 %x17)
- %r = call i64 @foo()
+ %r = call i64 @foo(ptr %p)
%fn = inttoptr i64 %r to ptr
- musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as)
+ musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as)
ret void
}
-declare i64 @foo()
+declare i64 @foo(ptr)
attributes #0 = { "frame-pointer"="non-leaf" }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
index ff2c5c4..67b4ebb 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
@@ -1,13 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-NOFP16
-; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-FP16
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
define float @mul_HalfS(<2 x float> %bin.rdx) {
-; CHECK-LABEL: mul_HalfS:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: mul_HalfS:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mul_HalfS:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx)
ret float %r
}
@@ -40,6 +49,27 @@ define half @mul_HalfH(<4 x half> %bin.rdx) {
; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2]
; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3]
; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: mul_HalfH:
+; CHECK-GI-NOFP16: // %bb.0:
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: mul_HalfH:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT: fmul h1, h2, h3
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT: ret
%r = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx)
ret half %r
}
@@ -93,17 +123,49 @@ define half @mul_H(<8 x half> %bin.rdx) {
; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2]
; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3]
; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: mul_H:
+; CHECK-GI-NOFP16: // %bb.0:
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v1.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: mul_H:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT: fmul h1, h2, h3
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT: ret
%r = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %bin.rdx)
ret half %r
}
define float @mul_S(<4 x float> %bin.rdx) {
-; CHECK-LABEL: mul_S:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: mul_S:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mul_S:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %bin.rdx)
ret float %r
}
@@ -205,18 +267,56 @@ define half @mul_2H(<16 x half> %bin.rdx) {
; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2]
; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3]
; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: mul_2H:
+; CHECK-GI-NOFP16: // %bb.0:
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: mul_2H:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT: mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT: fmul h1, h2, h3
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT: ret
%r = call fast half @llvm.vector.reduce.fmul.f16.v16f16(half 1.0, <16 x half> %bin.rdx)
ret half %r
}
define float @mul_2S(<8 x float> %bin.rdx) {
-; CHECK-LABEL: mul_2S:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: mul_2S:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mul_2S:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %bin.rdx)
ret float %r
}
@@ -233,15 +333,26 @@ define double @mul_2D(<4 x double> %bin.rdx) {
; added at least one test where the start value is not 1.0.
define float @mul_S_init_42(<4 x float> %bin.rdx) {
-; CHECK-LABEL: mul_S_init_42:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov w8, #1109917696 // =0x42280000
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: fmul s0, s0, s1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: mul_S_init_42:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: mov w8, #1109917696 // =0x42280000
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: fmul s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mul_S_init_42:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: mov w8, #1109917696 // =0x42280000
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
ret float %r
}
@@ -335,6 +446,51 @@ define half @fmul_reduct_reassoc_v8f16(<8 x half> %a, <8 x half> %b) {
; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2]
; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3]
; CHECK-SD-FP16-NEXT: ret
+;
+; CHECK-GI-NOFP16-LABEL: fmul_reduct_reassoc_v8f16:
+; CHECK-GI-NOFP16: // %bb.0:
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NOFP16-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v2.2s
+; CHECK-GI-NOFP16-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-GI-NOFP16-NEXT: mov s2, v0.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
+; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s3
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: ret
+;
+; CHECK-GI-FP16-LABEL: fmul_reduct_reassoc_v8f16:
+; CHECK-GI-FP16: // %bb.0:
+; CHECK-GI-FP16-NEXT: mov d2, v0.d[1]
+; CHECK-GI-FP16-NEXT: mov d3, v1.d[1]
+; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v2.4h
+; CHECK-GI-FP16-NEXT: fmul v1.4h, v1.4h, v3.4h
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h5, v1.h[1]
+; CHECK-GI-FP16-NEXT: mov h6, v1.h[2]
+; CHECK-GI-FP16-NEXT: mov h7, v1.h[3]
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h2
+; CHECK-GI-FP16-NEXT: fmul h2, h3, h4
+; CHECK-GI-FP16-NEXT: fmul h1, h1, h5
+; CHECK-GI-FP16-NEXT: fmul h3, h6, h7
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h2
+; CHECK-GI-FP16-NEXT: fmul h1, h1, h3
+; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT: ret
%r1 = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %a)
%r2 = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %b)
%r = fmul fast half %r1, %r2
@@ -342,15 +498,30 @@ define half @fmul_reduct_reassoc_v8f16(<8 x half> %a, <8 x half> %b) {
}
define float @fmul_reduct_reassoc_v8f32(<8 x float> %a, <8 x float> %b) {
-; CHECK-LABEL: fmul_reduct_reassoc_v8f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fmul v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fmul_reduct_reassoc_v8f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmul v2.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fmul_reduct_reassoc_v8f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fmul v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-GI-NEXT: mov s2, v0.s[1]
+; CHECK-GI-NEXT: mov s3, v1.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s2
+; CHECK-GI-NEXT: fmul s1, s1, s3
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a)
%r2 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %b)
%r = fmul fast float %r1, %r2
@@ -358,13 +529,26 @@ define float @fmul_reduct_reassoc_v8f32(<8 x float> %a, <8 x float> %b) {
}
define float @fmul_reduct_reassoc_v4f32(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: fmul_reduct_reassoc_v4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-GI-NEXT: mov s2, v0.s[1]
+; CHECK-GI-NEXT: mov s3, v1.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s2
+; CHECK-GI-NEXT: fmul s1, s1, s3
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
%r2 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %b)
%r = fmul fast float %r1, %r2
@@ -372,17 +556,31 @@ define float @fmul_reduct_reassoc_v4f32(<4 x float> %a, <4 x float> %b) {
}
define float @fmul_reduct_reassoc_v4f32_init(float %i, <4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: fmul_reduct_reassoc_v4f32_init:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: fmul v1.2s, v1.2s, v3.2s
-; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: fmul s1, s1, v1.s[1]
-; CHECK-NEXT: fmul v2.2s, v2.2s, v3.2s
-; CHECK-NEXT: fmul s0, s0, s1
-; CHECK-NEXT: fmul s1, s2, v2.s[1]
-; CHECK-NEXT: fmul s0, s0, s1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f32_init:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-SD-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-SD-NEXT: ext v3.16b, v2.16b, v2.16b, #8
+; CHECK-SD-NEXT: fmul s1, s1, v1.s[1]
+; CHECK-SD-NEXT: fmul v2.2s, v2.2s, v3.2s
+; CHECK-SD-NEXT: fmul s0, s0, s1
+; CHECK-SD-NEXT: fmul s1, s2, v2.s[1]
+; CHECK-SD-NEXT: fmul s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f32_init:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-GI-NEXT: mov d3, v2.d[1]
+; CHECK-GI-NEXT: mov s4, v1.s[1]
+; CHECK-GI-NEXT: fmul v2.2s, v2.2s, v3.2s
+; CHECK-GI-NEXT: fmul s1, s1, s4
+; CHECK-GI-NEXT: mov s3, v2.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: fmul s1, s2, s3
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %i, <4 x float> %a)
%r2 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %b)
%r = fmul fast float %r1, %r2
@@ -390,14 +588,28 @@ define float @fmul_reduct_reassoc_v4f32_init(float %i, <4 x float> %a, <4 x floa
}
define float @fmul_reduct_reassoc_v4v8f32(<4 x float> %a, <8 x float> %b) {
-; CHECK-LABEL: fmul_reduct_reassoc_v4v8f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fmul_reduct_reassoc_v4v8f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmul v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fmul_reduct_reassoc_v4v8f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmul v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-GI-NEXT: mov s2, v0.s[1]
+; CHECK-GI-NEXT: mov s3, v1.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s2
+; CHECK-GI-NEXT: fmul s1, s1, s3
+; CHECK-GI-NEXT: fmul s0, s0, s1
+; CHECK-GI-NEXT: ret
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
%r2 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %b)
%r = fmul fast float %r1, %r2
@@ -405,13 +617,22 @@ define float @fmul_reduct_reassoc_v4v8f32(<4 x float> %a, <8 x float> %b) {
}
define double @fmul_reduct_reassoc_v4f64(<4 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: fmul_reduct_reassoc_v4f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: fmul v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: fmul d0, d0, v0.d[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmul v2.2d, v2.2d, v3.2d
+; CHECK-SD-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: fmul v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: fmul d0, d0, v0.d[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: fmul v1.2d, v2.2d, v3.2d
+; CHECK-GI-NEXT: fmul d0, d0, v0.d[1]
+; CHECK-GI-NEXT: fmul d1, d1, v1.d[1]
+; CHECK-GI-NEXT: fmul d0, d0, d1
+; CHECK-GI-NEXT: ret
%r1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a)
%r2 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %b)
%r = fmul fast double %r1, %r2
@@ -419,17 +640,31 @@ define double @fmul_reduct_reassoc_v4f64(<4 x double> %a, <4 x double> %b) {
}
define float @fmul_reduct_reassoc_v4f32_extrause(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: fmul_reduct_reassoc_v4f32_extrause:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v2.2s
-; CHECK-NEXT: fmul v1.2s, v1.2s, v3.2s
-; CHECK-NEXT: fmul s0, s0, v0.s[1]
-; CHECK-NEXT: fmul s1, s1, v1.s[1]
-; CHECK-NEXT: fmul s1, s0, s1
-; CHECK-NEXT: fmul s0, s1, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f32_extrause:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v2.2s
+; CHECK-SD-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
+; CHECK-SD-NEXT: fmul s1, s1, v1.s[1]
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: fmul s0, s1, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f32_extrause:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s
+; CHECK-GI-NEXT: mov s2, v0.s[1]
+; CHECK-GI-NEXT: mov s3, v1.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s2
+; CHECK-GI-NEXT: fmul s1, s1, s3
+; CHECK-GI-NEXT: fmul s1, s0, s1
+; CHECK-GI-NEXT: fmul s0, s1, s0
+; CHECK-GI-NEXT: ret
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
%r2 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %b)
%r = fmul fast float %r1, %r2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
index 0df80d6..9580326 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
@@ -7,7 +7,7 @@
define amdgpu_kernel void @stack_write_fi() {
; CHECK-LABEL: stack_write_fi:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_add_u32 s0, s0, s17
+; CHECK-NEXT: s_add_u32 s0, s0, s15
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_mov_b32 s4, 0
@@ -23,3 +23,6 @@ entry:
store volatile i64 0, ptr addrspace(5) %alloca, align 4
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
index c4e383c..44c4910 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
@@ -7,48 +7,47 @@ declare void @callee()
define amdgpu_kernel void @call_debug_loc() {
; CHECK-LABEL: name: call_debug_loc
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !6
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !6
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !6
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !6
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !6
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !6
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !6
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !6
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !6
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6
- ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !6
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !6
- ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !6
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !6
- ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !6
- ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !6
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !6
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !6
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !6
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !6
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6
- ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6
- ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !6
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !6
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !6
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !7
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !7
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !7
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13, debug-location !7
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12, debug-location !7
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9, debug-location !7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !7
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !7
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF debug-location !7
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !7
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !7
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !7
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !7
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !7
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !7
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !7
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY14]], [[COPY1]], implicit $exec, debug-location !7
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !7
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !7
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[COPY]], implicit $exec, debug-location !7
+ ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !7
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !7
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]], debug-location !7
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]], debug-location !7
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]], debug-location !7
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]], debug-location !7
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]], debug-location !7
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]], debug-location !7
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]], debug-location !7
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]], debug-location !7
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]], debug-location !7
+ ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !7
+ ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !7
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !7 :: (dereferenceable invariant load (p0) from got, addrspace 4)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !7
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !7
; CHECK-NEXT: S_ENDPGM 0
entry:
call void @callee(), !dbg !6
@@ -60,11 +59,11 @@ define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) {
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !6
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !7
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !6
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !7
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1)
; CHECK-NEXT: SI_RETURN
entry:
@@ -78,7 +77,7 @@ declare ptr @llvm.returnaddress(i32 immarg) #0
attributes #0 = { nofree nosync nounwind readnone willreturn }
!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.module.flags = !{!2, !3, !4, !5, !10}
!0 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !1, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git 4132dc917eddb446405cc5afef41167b8bce360b)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "gisel_1_gfx1031.cl", directory: "/home/matt/builds/conformance/2.0")
@@ -90,3 +89,4 @@ attributes #0 = { nofree nosync nounwind readnone willreturn }
!7 = distinct !DISubprogram(name: "call_debug_loc", scope: !1, file: !1, line: 8, type: !8, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9)
!8 = !DISubroutineType(types: !9)
!9 = !{}
+!10 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index a2f1308..83c331c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -32,13 +32,13 @@ define void @call_result_align_1() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -81,13 +81,13 @@ define void @call_result_align_8() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -131,13 +131,13 @@ define void @declaration_result_align_8() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -181,11 +181,11 @@ define ptr addrspace(1) @tail_call_assert_align() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -209,3 +209,6 @@ entry:
%call = tail call ptr addrspace(1) @returns_ptr_align8()
ret ptr addrspace(1) %call
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
index 9359bde..8069698 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
@@ -30,8 +30,8 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) {
; CHECK-NEXT: bb.2.atomicrmw.start:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %16(s64), %bb.2, [[C1]](s64), %bb.1
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %15(s64), %bb.2, [[C1]](s64), %bb.1
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %13(s32), %bb.2
; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]]
; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
@@ -48,3 +48,6 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) {
%oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst
ret float %oldval
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 4cf02c93..ca889de 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -9,37 +9,36 @@ declare hidden void @extern()
define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-LABEL: name: kernel_call_no_workitem_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY7]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY9]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY10]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY11]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
@@ -50,40 +49,39 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-LABEL: name: kernel_call_no_workgroup_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY7]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -95,12 +93,12 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-LABEL: name: kernel_call_no_other_sgprs
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
@@ -139,12 +137,12 @@ define void @func_call_no_workitem_ids() {
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -177,12 +175,12 @@ define void @func_call_no_workgroup_ids() {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]](p4)
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
@@ -226,3 +224,6 @@ define void @func_call_no_other_sgprs() {
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 711e1b7..213598b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -11,52 +11,51 @@ declare hidden void @external_void_func_v32i32(<32 x i32>) #0
define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-LABEL: name: test_call_external_void_func_i32
; GFX900: bb.1 (%ir-block.1):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -64,52 +63,51 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
;
; GFX908-LABEL: name: test_call_external_void_func_i32
; GFX908: bb.1 (%ir-block.1):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -130,13 +128,13 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -171,13 +169,13 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -209,41 +207,40 @@ define void @test_func_call_external_void_func_i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-LABEL: name: test_call_external_void_func_v32i32
; GFX900: bb.1 (%ir-block.1):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -281,16 +278,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
@@ -298,41 +295,40 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
;
; GFX908-LABEL: name: test_call_external_void_func_v32i32
; GFX908: bb.1 (%ir-block.1):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -370,16 +366,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
@@ -400,7 +396,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
@@ -458,7 +454,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -528,7 +524,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
@@ -586,7 +582,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -651,84 +647,82 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 {
; GFX900-LABEL: name: test_only_workitem_id_x
; GFX900: bb.1 (%ir-block.0):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
- ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY10]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY11]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GFX900-NEXT: $vgpr31 = COPY [[COPY13]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
; GFX908-LABEL: name: test_only_workitem_id_x
; GFX908: bb.1 (%ir-block.0):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
- ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY10]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY11]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GFX908-NEXT: $vgpr31 = COPY [[COPY13]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
@@ -739,45 +733,44 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 {
; GFX900-LABEL: name: test_only_workitem_id_y
; GFX900: bb.1 (%ir-block.0):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
+ ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY10]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY11]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -785,45 +778,44 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
;
; GFX908-LABEL: name: test_only_workitem_id_y
; GFX908: bb.1 (%ir-block.0):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
+ ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY10]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY11]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -835,45 +827,44 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 {
; GFX900-LABEL: name: test_only_workitem_id_z
; GFX900: bb.1 (%ir-block.0):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
- ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
+ ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY10]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY11]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -881,45 +872,44 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
;
; GFX908-LABEL: name: test_only_workitem_id_z
; GFX908: bb.1 (%ir-block.0):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
+ ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY10]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY11]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -931,46 +921,45 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !3 {
; GFX900-LABEL: name: test_only_workitem_id_xy
; GFX900: bb.1 (%ir-block.0):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
- ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
+ ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
+ ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY11]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY13]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -978,46 +967,45 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
;
; GFX908-LABEL: name: test_only_workitem_id_xy
; GFX908: bb.1 (%ir-block.0):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
- ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
+ ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
+ ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY11]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY13]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1029,50 +1017,49 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !4 {
; GFX900-LABEL: name: test_only_workitem_id_yz
; GFX900: bb.1 (%ir-block.0):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
+ ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C3]](s32)
; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
- ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
+ ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C4]](s32)
; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY11]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY13]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1080,50 +1067,49 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
;
; GFX908-LABEL: name: test_only_workitem_id_yz
; GFX908: bb.1 (%ir-block.0):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
+ ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C3]](s32)
; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
- ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
+ ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C4]](s32)
; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY11]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY13]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1135,46 +1121,45 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !5 {
; GFX900-LABEL: name: test_only_workitem_id_xz
; GFX900: bb.1 (%ir-block.0):
- ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
- ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
+ ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
+ ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY11]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY12]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY13]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1182,46 +1167,45 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
;
; GFX908-LABEL: name: test_only_workitem_id_xz
; GFX908: bb.1 (%ir-block.0):
- ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
- ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
+ ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
+ ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY11]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY12]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY13]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1237,9 +1221,11 @@ declare i32 @llvm.amdgcn.workitem.id.z() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable willreturn }
+!llvm.module.flags = !{!6}
!0 = !{i32 64, i32 1, i32 1}
!1 = !{i32 1, i32 64, i32 1}
!2 = !{i32 1, i32 1, i32 64}
!3 = !{i32 32, i32 2, i32 1}
!4 = !{i32 1, i32 32, i32 2}
!5 = !{i32 32, i32 1, i32 2}
+!6 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 609883c..8b0a006 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -70,58 +70,57 @@ declare hidden i32 @external_gfx_i32_func_i32(i32) #0
define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
; GCN-LABEL: name: test_call_external_i32_func_i32_imm
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY19]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call i32 @external_i32_func_i32(i32 42)
store volatile i32 %val, ptr addrspace(1) %out
@@ -155,54 +154,53 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1)
define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-LABEL: name: test_call_external_i1_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -233,54 +231,53 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i1_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 1
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
@@ -295,54 +292,53 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i1_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 1
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
@@ -357,54 +353,53 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-LABEL: name: test_call_external_i8_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
@@ -437,54 +432,53 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i8_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 8
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
@@ -499,54 +493,53 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i8_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 8
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
@@ -561,54 +554,53 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -620,54 +612,53 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i16_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 16
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
@@ -682,54 +673,53 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i16_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 16
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
@@ -744,55 +734,54 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call i32 @external_i32_func_void()
store volatile i32 %val, ptr addrspace(1) undef
@@ -820,55 +809,54 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
; GCN-LABEL: name: test_call_external_i48_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1)
@@ -881,55 +869,54 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i48_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
@@ -944,55 +931,54 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i48_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
@@ -1007,55 +993,54 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
; GCN-LABEL: name: test_call_external_i64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i64_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1067,55 +1052,54 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
; GCN-LABEL: name: test_call_external_p1_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p1_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1127,58 +1111,57 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2p1_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p1_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
- ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
@@ -1191,55 +1174,54 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
; GCN-LABEL: name: test_call_external_p3_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p3_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3)
+ ; GCN-NEXT: G_STORE [[COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3)
; GCN-NEXT: S_ENDPGM 0
%val = call ptr addrspace(3) @external_p3_func_void()
store volatile ptr addrspace(3) %val, ptr addrspace(3) undef
@@ -1249,55 +1231,54 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2p3_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p3_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(p3) = COPY $vgpr1
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY19]](p3), [[COPY20]](p3)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `ptr addrspace(3) undef`, addrspace 3)
; GCN-NEXT: S_ENDPGM 0
@@ -1309,54 +1290,53 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1368,55 +1348,54 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
; GCN-LABEL: name: test_call_external_f32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call float @external_f32_func_void()
store volatile float %val, ptr addrspace(1) undef
@@ -1426,55 +1405,54 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
; GCN-LABEL: name: test_call_external_f64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f64_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1486,58 +1464,57 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2f64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f64_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
- ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
@@ -1550,55 +1527,54 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1610,56 +1586,55 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1671,57 +1646,56 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v4i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1733,58 +1707,57 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v5i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1796,61 +1769,60 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v8i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v8i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1862,69 +1834,68 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v16i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v16i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1936,85 +1907,84 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v32i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30
- ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2026,55 +1996,54 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <2 x i16> @external_v2i16_func_void()
store volatile <2 x i16> %val, ptr addrspace(1) undef
@@ -2084,55 +2053,54 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2146,55 +2114,54 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v4i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2206,55 +2173,54 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <2 x half> @external_v2f16_func_void()
store volatile <2 x half> %val, ptr addrspace(1) undef
@@ -2264,55 +2230,54 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2326,55 +2291,54 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v4f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4f16_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2386,56 +2350,55 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3f32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2447,58 +2410,57 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v5f32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5f32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2511,58 +2473,57 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
; GCN-LABEL: name: test_call_external_i32_i64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call { i32, i64 } @external_i32_i64_func_void()
@@ -2601,57 +2562,56 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_a2i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a2i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call [2 x i32] @external_a2i32_func_void()
%val.0 = extractvalue [2 x i32] %val, 0
@@ -2664,66 +2624,65 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
; GCN-LABEL: name: test_call_external_a5i8_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a5i8_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
; GCN-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
; GCN-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32)
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
@@ -2749,52 +2708,51 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v32i32_i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2816,52 +2774,51 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_i32_v32i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2883,52 +2840,51 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v33i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2943,18 +2899,17 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 {
; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4)
@@ -2964,40 +2919,40 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
; GCN-NEXT: $vgpr1 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr2 = COPY [[UV1]](s32)
; GCN-NEXT: $vgpr3 = COPY [[LOAD1]](s32)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3012,3 +2967,6 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind noinline }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 820e41d..ed68d82 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -6,18 +6,17 @@ declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(p
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
; GCN-LABEL: name: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
; GCN: bb.1 (%ir-block.1):
- ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -30,24 +29,24 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -55,16 +54,16 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -89,3 +88,6 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
store volatile i32 %out.val1, ptr addrspace(1) undef
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 2ede504..e29d178 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -97,49 +97,48 @@ declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8,
define amdgpu_kernel void @test_call_external_void_func_void() #0 {
; CHECK-LABEL: name: test_call_external_void_func_void
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -174,12 +173,12 @@ define void @test_func_call_external_void_func_void() #0 {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -208,51 +207,50 @@ define void @test_func_call_external_void_func_void() #0 {
define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
; CHECK-LABEL: name: test_call_external_void_func_empty_struct
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -264,51 +262,50 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
; CHECK-LABEL: name: test_call_external_void_func_empty_array
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -320,52 +317,51 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_i1_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -377,54 +373,53 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i1_signext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -437,54 +432,53 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i1_zeroext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -497,54 +491,53 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i8_imm
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -556,55 +549,54 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i8_signext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8)
; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[SEXT1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -617,55 +609,54 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i8_zeroext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8)
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ZEXT1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -678,52 +669,51 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_i16_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -735,54 +725,53 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i16_signext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -795,54 +784,53 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i16_zeroext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -855,52 +843,51 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i32_imm
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -950,53 +937,52 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_i64_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1008,56 +994,55 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i64
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1070,57 +1055,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i64_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1132,56 +1116,55 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i48
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1194,56 +1177,55 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i48_signext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1256,56 +1238,55 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_i48_zeroext
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1318,54 +1299,53 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
; CHECK-LABEL: name: test_call_external_void_func_p0_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1377,56 +1357,55 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2p0
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `ptr addrspace(1) null`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1439,18 +1418,17 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3i64
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
@@ -1459,24 +1437,24 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -1485,16 +1463,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1509,18 +1487,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v4i64
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
@@ -1529,24 +1506,24 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -1557,16 +1534,16 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1580,52 +1557,51 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_f16_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1637,51 +1613,50 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_f32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1693,55 +1668,54 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2f32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1753,57 +1727,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3f32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1815,18 +1788,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v5f32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
@@ -1835,24 +1807,24 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -1860,16 +1832,16 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1881,53 +1853,52 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_f64_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1939,57 +1910,56 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2f64_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2001,42 +1971,41 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3f64_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -2045,16 +2014,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2066,52 +2035,51 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2124,57 +2092,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2187,57 +2154,56 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3f16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2250,54 +2216,53 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v4i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2310,18 +2275,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v4i16_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
@@ -2329,38 +2293,38 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2372,58 +2336,57 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v5i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF3]](s16)
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>)
; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2436,59 +2399,58 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v7i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF3]](s16)
; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>)
; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>)
; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2501,44 +2463,43 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v63i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF2]](s16)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF3]](s16)
; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -2575,16 +2536,16 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>)
; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>)
; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
@@ -2597,44 +2558,43 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v65i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF2]](s16)
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF3]](s16)
; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -2674,16 +2634,16 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>)
; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>)
; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -2696,40 +2656,39 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v66i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -2770,16 +2729,16 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -2792,52 +2751,51 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2f16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2850,54 +2808,53 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2910,55 +2867,54 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2970,18 +2926,17 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3i32_imm
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
@@ -2989,39 +2944,39 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3033,18 +2988,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3i32_i32
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
@@ -3053,40 +3007,40 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[C3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3098,56 +3052,55 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v4i32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3160,18 +3113,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v4i32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
@@ -3179,40 +3131,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3224,18 +3176,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v5i32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
@@ -3244,24 +3195,24 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -3269,16 +3220,16 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3290,41 +3241,40 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v8i32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -3335,16 +3285,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3358,18 +3308,17 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v8i32_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
@@ -3381,24 +3330,24 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C8]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C9]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C10]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -3409,16 +3358,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3430,41 +3379,40 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v16i32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
@@ -3483,16 +3431,16 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32)
; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32)
; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3506,41 +3454,40 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v32i32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3578,16 +3525,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
@@ -3601,18 +3548,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-LABEL: name: test_call_external_void_func_v32i32_i32
; CHECK: bb.1 (%ir-block.1):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
@@ -3621,24 +3567,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3679,16 +3625,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF2]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF3]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -3703,18 +3649,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v32i32_i8_i8_i16
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
@@ -3723,24 +3668,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3751,10 +3696,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32)
- ; CHECK-NEXT: G_STORE [[COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
+ ; CHECK-NEXT: G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
@@ -3789,16 +3734,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF2]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF3]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc
@@ -3815,18 +3760,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v32i32_p3_p5
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
@@ -3835,24 +3779,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3896,16 +3840,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF2]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF3]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
@@ -3921,18 +3865,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-LABEL: name: test_call_external_void_func_struct_i8_i32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1)
@@ -3941,39 +3884,39 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4039,18 +3982,17 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
; CHECK-LABEL: name: test_call_external_void_func_byval_struct_i8_i32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val
@@ -4060,40 +4002,40 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -4121,7 +4063,7 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1
@@ -4129,7 +4071,7 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -4181,13 +4123,13 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -4221,41 +4163,40 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align
define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i8
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4264,16 +4205,16 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32)
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4287,41 +4228,40 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v3i8
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4333,16 +4273,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32)
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4356,41 +4296,40 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v4i8
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4405,16 +4344,16 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32)
; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4428,41 +4367,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v8i8
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4489,16 +4427,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT14]](s32)
; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT15]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4512,41 +4450,40 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v16i8
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4597,16 +4534,16 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32)
; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16)
; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4620,18 +4557,17 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
; CHECK-LABEL: name: stack_passed_arg_alignment_v32i32_f64
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
@@ -4639,24 +4575,24 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -4701,16 +4637,16 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
@@ -4732,7 +4668,7 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
@@ -4765,7 +4701,7 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -4875,7 +4811,7 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
@@ -4908,7 +4844,7 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -5018,7 +4954,7 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
@@ -5047,7 +4983,7 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -5161,7 +5097,7 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
@@ -5190,7 +5126,7 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -5330,3 +5266,6 @@ main_body:
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind noinline }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
index 21e280e..ab40707 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
@@ -7,9 +7,9 @@
define amdgpu_kernel void @constant_fold_vector_add() {
; CHECK-LABEL: name: constant_fold_vector_add
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
@@ -22,3 +22,6 @@ entry:
store <4 x i64> %add, ptr addrspace(1) null, align 32
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index 4eba84f..0fb13bf9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -4,50 +4,49 @@
define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -73,3 +72,6 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) {
call amdgpu_gfx void %fptr()
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index bb37e54..ceff84e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -4,10 +4,10 @@
define amdgpu_kernel void @asm_convergent() convergent{
; CHECK-LABEL: name: asm_convergent
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !1
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0
ret void
@@ -16,11 +16,11 @@ define amdgpu_kernel void @asm_convergent() convergent{
define amdgpu_kernel void @asm_simple_memory_clobber() {
; CHECK-LABEL: name: asm_simple_memory_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !0
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !1
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !1
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "", "~{memory}"(), !srcloc !0
call void asm sideeffect "", ""(), !srcloc !0
@@ -30,10 +30,10 @@ define amdgpu_kernel void @asm_simple_memory_clobber() {
define amdgpu_kernel void @asm_simple_vgpr_clobber() {
; CHECK-LABEL: name: asm_simple_vgpr_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !1
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0
ret void
@@ -42,10 +42,10 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() {
define amdgpu_kernel void @asm_simple_sgpr_clobber() {
; CHECK-LABEL: name: asm_simple_sgpr_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !1
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0
ret void
@@ -54,10 +54,10 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() {
define amdgpu_kernel void @asm_simple_agpr_clobber() {
; CHECK-LABEL: name: asm_simple_agpr_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !1
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0
ret void
@@ -66,9 +66,9 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
define i32 @asm_vgpr_early_clobber() {
; CHECK-LABEL: name: asm_vgpr_early_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %7, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, !1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -94,8 +94,8 @@ entry:
define i32 @test_single_vgpr_output() nounwind {
; CHECK-LABEL: name: test_single_vgpr_output
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %7
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -106,8 +106,8 @@ entry:
define i32 @test_single_sgpr_output_s32() nounwind {
; CHECK-LABEL: name: test_single_sgpr_output_s32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %7
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -119,9 +119,9 @@ entry:
define float @test_multiple_register_outputs_same() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_same
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 2228234 /* regdef:VGPR_32 */, def %9
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %7, 2228234 /* regdef:VGPR_32 */, def %8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -136,9 +136,9 @@ define float @test_multiple_register_outputs_same() #0 {
define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_mixed
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 3538954 /* regdef:VReg_64 */, def %9
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %7, 3538954 /* regdef:VReg_64 */, def %8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %8
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
@@ -166,9 +166,9 @@ define float @test_vector_output() nounwind {
define amdgpu_kernel void @test_input_vgpr_imm() {
; CHECK-LABEL: name: test_input_vgpr_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32)
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY1]]
@@ -180,9 +180,9 @@ define amdgpu_kernel void @test_input_vgpr_imm() {
define amdgpu_kernel void @test_input_sgpr_imm() {
; CHECK-LABEL: name: test_input_sgpr_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32)
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[COPY1]]
@@ -194,9 +194,9 @@ define amdgpu_kernel void @test_input_sgpr_imm() {
define amdgpu_kernel void @test_input_imm() {
; CHECK-LABEL: name: test_input_imm
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42
; CHECK-NEXT: INLINEASM &"s_mov_b64 s[0:1], $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42
; CHECK-NEXT: S_ENDPGM 0
@@ -212,8 +212,8 @@ define float @test_input_vgpr(i32 %src) nounwind {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
- ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 2228233 /* reguse:VGPR_32 */, [[COPY1]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 2228233 /* reguse:VGPR_32 */, [[COPY1]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -227,8 +227,8 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind {
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3)
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
+ ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 262158 /* mem:m */, [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a)
@@ -244,8 +244,8 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
- ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11
+ ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%and = and i32 %a, 1
@@ -256,14 +256,14 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
define i32 @test_sgpr_matching_constraint() nounwind {
; CHECK-LABEL: name: test_sgpr_matching_constraint
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %10
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %7
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %9
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
- ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %12, 2359305 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12
+ ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %11, 2359305 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %11
; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -285,10 +285,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
- ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %11, 2228234 /* regdef:VGPR_32 */, def %12, 2228234 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13
+ ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %10, 2228234 /* regdef:VGPR_32 */, def %11, 2228234 /* regdef:VGPR_32 */, def %12, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %10
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %11
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %12
; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
@@ -306,11 +306,11 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %7
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -322,13 +322,15 @@ entry:
define amdgpu_kernel void @asm_constraint_n_n() {
; CHECK-LABEL: name: asm_constraint_n_n
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: INLINEASM &"s_trap ${0:n}", 1 /* sideeffect attdialect */, 13 /* imm */, 10
; CHECK-NEXT: S_ENDPGM 0
tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1
ret void
}
+!llvm.module.flags = !{!1}
!0 = !{i32 70}
+!1 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index cc4796e..b45b307 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -816,7 +816,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -933,7 +933,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval
; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -984,7 +984,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -1097,7 +1097,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed
; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1176,14 +1176,14 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 {
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1225,14 +1225,14 @@ define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspa
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1274,13 +1274,13 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 {
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1321,13 +1321,13 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 {
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1368,7 +1368,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
@@ -1377,7 +1377,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1422,14 +1422,14 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1471,7 +1471,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -1482,7 +1482,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1514,3 +1514,6 @@ entry:
attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
index edcb462..1ead1e4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
@@ -15,11 +15,11 @@ define void @tail_call_void_func_void() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @external_void_func_void
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -42,3 +42,6 @@ define void @tail_call_void_func_void() {
tail call void @external_void_func_void()
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index 387c5c7..a0f54bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -9,7 +9,8 @@
define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
; CI-LABEL: is_private_vgpr:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: s_load_dword s2, s[4:5], 0x32
; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s0
@@ -18,9 +19,7 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_load_dword s0, s[4:5], 0x11
-; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1
; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CI-NEXT: flat_store_dword v[0:1], v0
; CI-NEXT: s_endpgm
@@ -79,9 +78,9 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
; CI-LABEL: is_private_sgpr:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_load_dword s0, s[4:5], 0x11
+; CI-NEXT: s_load_dword s0, s[4:5], 0x32
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_cmp_lg_u32 s1, s0
; CI-NEXT: s_cbranch_scc1 .LBB1_2
@@ -150,3 +149,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
attributes #0 = { nounwind readnone speculatable }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index 75f1fea..4b3a475 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -9,7 +9,8 @@
define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; CI-LABEL: is_local_vgpr:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: s_load_dword s2, s[4:5], 0x33
; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s0
@@ -18,9 +19,7 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_load_dword s0, s[4:5], 0x10
-; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1
; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; CI-NEXT: flat_store_dword v[0:1], v0
; CI-NEXT: s_endpgm
@@ -79,9 +78,9 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
; CI-LABEL: is_local_sgpr:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_load_dword s0, s[4:5], 0x10
+; CI-NEXT: s_load_dword s0, s[4:5], 0x33
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_cmp_lg_u32 s1, s0
; CI-NEXT: s_cbranch_scc1 .LBB1_2
@@ -150,3 +149,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
attributes #0 = { nounwind readnone speculatable }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
index fec564f..e3779ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -75,8 +75,8 @@ bb.2:
store volatile i32 0, ptr addrspace(1) undef
ret void
}
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
-; DEFAULTSIZE: ; ScratchSize: 4112
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16
+; DEFAULTSIZE: ; ScratchSize: 16
; ASSUME1024: .amdhsa_private_segment_fixed_size 1040
; ASSUME1024: ; ScratchSize: 1040
@@ -137,8 +137,8 @@ bb.1:
ret void
}
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160
-; DEFAULTSIZE: ; ScratchSize: 4160
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64
+; DEFAULTSIZE: ; ScratchSize: 64
; ASSUME1024: .amdhsa_private_segment_fixed_size 1088
; ASSUME1024: ; ScratchSize: 1088
@@ -265,3 +265,9 @@ bb.1:
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone speculatable }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; ASSUME1024: {{.*}}
+; DEFAULTSIZE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 0897993..b356b9a 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -11,10 +11,10 @@ declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4)
@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
;.
-; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4
-; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
-; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4
-; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
+; HSA: @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
+; HSA: @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
+; HSA: @global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
+; HSA: @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
;.
define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
@@ -225,12 +225,19 @@ define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
attributes #0 = { argmemonly nounwind }
attributes #1 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
;.
; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.
+; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
+;.
+; ATTRIBUTOR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 313bd85..195c5da 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -1,23 +1,27 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
-; GFX11-LABEL: {{^}}lds_direct_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_direct_load:
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
i32 inreg %arg1, i32 inreg %arg2) #0 {
main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 5a8b03e..1ab753d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -1,25 +1,32 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
-; GFX11-LABEL: {{^}}lds_param_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_param_load:
+; GCN: s_mov_b32 m0
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x
-; GFX11: s_waitcnt expcnt(4)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(3)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x
+; GCN: s_waitcnt expcnt(4)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(3)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
main_body:
%p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
index c287789..dcf9b3f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
@@ -1,42 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12-SDAG %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-SDAG %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12-GISEL %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-GISEL %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
; Scalar data prefetch
define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_data_sgpr:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_data_sgpr:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_data_sgpr:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_data_sgpr_offset:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_data_sgpr_offset:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_data_sgpr_offset:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
@@ -46,18 +38,14 @@ entry:
; Check large offsets
define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_data_sgpr_max_offset:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_data_sgpr_max_offset:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr_max_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_data_sgpr_max_offset:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
@@ -65,18 +53,14 @@ entry:
}
define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_data_sgpr_min_offset:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], -0x800000, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_data_sgpr_min_offset:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_data s[0:1], -0x800000, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr_min_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_data_sgpr_min_offset:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
@@ -96,6 +80,9 @@ define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inre
;
; GFX12-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
; GFX12-GISEL: ; %bb.0: ; %entry
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
+; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
@@ -137,55 +124,43 @@ entry:
; Check supported address spaces
define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_data_sgpr_flat:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_data_sgpr_flat:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr_flat:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_data_sgpr_flat:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_data_sgpr_global:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_data_sgpr_global:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr_global:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_data_sgpr_global:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_data_sgpr_constant_32bit:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_mov_b32 s1, 0
-; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_data_sgpr_constant_32bit:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_mov_b32 s1, 0
+; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr_constant_32bit:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_data_sgpr_constant_32bit:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
ret void
@@ -194,36 +169,28 @@ entry:
; I$ prefetch
define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_inst_sgpr:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_inst_sgpr:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_inst_sgpr:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_inst_sgpr:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_inst_sgpr_offset:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_inst_sgpr_offset:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_inst_sgpr_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_inst_sgpr_offset:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
@@ -233,18 +200,14 @@ entry:
; Check large offsets
define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_inst_sgpr_max_offset:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_inst_sgpr_max_offset:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_inst_sgpr_max_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_inst_sgpr_max_offset:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
@@ -252,18 +215,14 @@ entry:
}
define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
-; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], -0x800000, null, 0
-; GFX12-SDAG-NEXT: s_endpgm
+; GFX12-LABEL: prefetch_inst_sgpr_min_offset:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_prefetch_inst s[0:1], -0x800000, null, 0
+; GFX12-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_inst_sgpr_min_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
-; GFX12-GISEL: ; %bb.0: ; %entry
-; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
@@ -283,6 +242,9 @@ define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inre
;
; GFX12-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
; GFX12-GISEL: ; %bb.0: ; %entry
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
+; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
diff --git a/llvm/test/CodeGen/PowerPC/coalescer-remat-with-undef-implicit-def-operand.mir b/llvm/test/CodeGen/PowerPC/coalescer-remat-with-undef-implicit-def-operand.mir
deleted file mode 100644
index 8e4e3be..0000000
--- a/llvm/test/CodeGen/PowerPC/coalescer-remat-with-undef-implicit-def-operand.mir
+++ /dev/null
@@ -1,28 +0,0 @@
-# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-coalescing -run-pass=register-coalescer \
-# RUN: -o - %s | FileCheck %s
----
-name: _Z13testTransposeIfLj31ELj17EEvv
-alignment: 16
-tracksRegLiveness: true
-frameInfo:
- maxAlignment: 128
-machineFunctionInfo: {}
-body: |
- ; CHECK-LABEL: name: _Z13testTransposeIfLj31ELj17EEvv
- ; CHECK: undef %[[REG:[0-9]+]].sub_64:vsrc = IMPLICIT_DEF implicit-def %[[REG]]
- bb.0:
- liveins: $x2
- %2:vssrc = IMPLICIT_DEF
- B %bb.2
-
- bb.1:
- %0:vsrc = SUBREG_TO_REG 1, killed %2, %subreg.sub_64
- %1:vsrc = XXPERMDI killed undef %0, killed %0, 0
- BLR8 implicit $lr8, implicit $rm
-
- bb.2:
- successors: %bb.2(0x7c000000), %bb.1(0x04000000)
- BDNZ8 %bb.2, implicit-def $ctr8, implicit $ctr8
- B %bb.1
-
-...
diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll b/llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll
new file mode 100644
index 0000000..6f3e9f7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck %s
+
+define noundef signext i32 @ham(ptr nocapture noundef %arg) #0 {
+; CHECK-LABEL: ham:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: lwz 4, 0(3)
+; CHECK-NEXT: cmpwi 4, 750
+; CHECK-NEXT: addi 5, 4, 1
+; CHECK-NEXT: li 4, 1
+; CHECK-NEXT: bc 12, 0, L..BB0_1
+; CHECK-NEXT: b L..BB0_2
+; CHECK-NEXT: L..BB0_1: # %bb
+; CHECK-NEXT: addi 4, 5, 0
+; CHECK-NEXT: L..BB0_2: # %bb
+; CHECK-NEXT: stw 4, 0(3)
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: blr
+bb:
+ %load = load i32, ptr %arg, align 4
+ %icmp = icmp slt i32 %load, 750
+ %add = add nsw i32 %load, 1
+ %select = select i1 %icmp, i32 %add, i32 1
+ store i32 %select, ptr %arg, align 4
+ ret i32 0
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+power8-vector,+quadword-atomics,+vsx,-aix-small-local-exec-tls,-isa-v30-instructions,-isel,-power9-vector,-privileged,-rop-protect,-spe" }
diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll
index 3718ce8..7c98d4a 100644
--- a/llvm/test/CodeGen/RISCV/mem.ll
+++ b/llvm/test/CodeGen/RISCV/mem.ll
@@ -338,3 +338,28 @@ bb:
}
declare void @snork(ptr)
+
+define i8 @disjoint_or_lb(ptr %a, i32 %off) nounwind {
+; RV32I-LABEL: disjoint_or_lb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lbu a0, 3(a0)
+; RV32I-NEXT: ret
+ %b = or disjoint i32 %off, 3
+ %1 = getelementptr i8, ptr %a, i32 %b
+ %2 = load i8, ptr %1
+ ret i8 %2
+}
+
+define i32 @disjoint_or_lw(ptr %a, i32 %off) nounwind {
+; RV32I-LABEL: disjoint_or_lw:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: ret
+ %b = or disjoint i32 %off, 3
+ %1 = getelementptr i32, ptr %a, i32 %b
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index fc94f8c..47d65c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1231,17 +1231,16 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_nxv1i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vmv.v.x v9, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT: vsrl.vi v8, v10, 23
-; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
-; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8
+; CHECK-F-NEXT: vsrl.vi v8, v9, 23
; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v9, a1
+; CHECK-F-NEXT: vzext.vf2 v9, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v9, a1
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vminu.vx v8, v8, a1
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
@@ -1372,17 +1371,16 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_nxv2i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vmv.v.x v10, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT: vsrl.vi v8, v12, 23
-; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
-; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT: vsrl.vi v8, v10, 23
; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v10, a1
+; CHECK-F-NEXT: vzext.vf2 v10, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v10, a1
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vminu.vx v8, v8, a1
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
@@ -1513,17 +1511,16 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_nxv4i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vmv.v.x v12, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT: vsrl.vi v8, v16, 23
-; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
-; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
+; CHECK-F-NEXT: vsrl.vi v8, v12, 23
; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v12, a1
+; CHECK-F-NEXT: vzext.vf2 v12, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v12, a1
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vminu.vx v8, v8, a1
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
@@ -1654,17 +1651,16 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_nxv8i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vmv.v.x v16, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v24, v8
-; CHECK-F-NEXT: vsrl.vi v8, v24, 23
-; CHECK-F-NEXT: vwsubu.wv v16, v16, v8
-; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
+; CHECK-F-NEXT: vsrl.vi v8, v16, 23
; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v16, a1
+; CHECK-F-NEXT: vzext.vf2 v16, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v16, a1
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vminu.vx v8, v8, a1
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
@@ -2837,16 +2833,15 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vmv.v.x v9, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT: vsrl.vi v8, v10, 23
-; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
+; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8
+; CHECK-F-NEXT: vsrl.vi v8, v9, 23
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-F-NEXT: vzext.vf2 v9, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v9, a1
; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: vmv1r.v v8, v9
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64:
@@ -2973,16 +2968,15 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vmv.v.x v10, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT: vsrl.vi v8, v12, 23
-; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
+; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT: vsrl.vi v8, v10, 23
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-F-NEXT: vzext.vf2 v10, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v10, a1
; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: vmv2r.v v8, v10
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64:
@@ -3109,16 +3103,15 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vmv.v.x v12, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT: vsrl.vi v8, v16, 23
-; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
+; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
+; CHECK-F-NEXT: vsrl.vi v8, v12, 23
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-F-NEXT: vzext.vf2 v12, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v12, a1
; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: vmv4r.v v8, v12
; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64:
@@ -3245,15 +3238,14 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64:
; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: vmv8r.v v16, v8
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vmv.v.x v8, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16
-; CHECK-F-NEXT: vsrl.vi v16, v24, 23
-; CHECK-F-NEXT: vwsubu.wv v8, v8, v16
+; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
+; CHECK-F-NEXT: vsrl.vi v8, v16, 23
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-F-NEXT: vzext.vf2 v16, v8
+; CHECK-F-NEXT: li a1, 190
+; CHECK-F-NEXT: vrsub.vx v8, v16, a1
; CHECK-F-NEXT: fsrm a0
; CHECK-F-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index fe605d5..d99e3a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -10,12 +10,14 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING
+; FIXME: We should use vwadd/vwsub/vwmul instructions.
; Check that the scalable vector add/sub/mul operations are all promoted into their
; vw counterpart when the folding of the web size is increased to 3.
; We need the web size to be at least 3 for the folding to happen, because
; %c has 3 uses.
; see https://github.com/llvm/llvm-project/pull/72340
+; FIXME: We don't currently use widening instructions.
define <vscale x 2 x i16> @vwop_vscale_sext_multiple_users(ptr %x, ptr %y, ptr %z) {
; NO_FOLDING-LABEL: vwop_vscale_sext_multiple_users:
; NO_FOLDING: # %bb.0:
@@ -35,16 +37,18 @@ define <vscale x 2 x i16> @vwop_vscale_sext_multiple_users(ptr %x, ptr %y, ptr %
;
; FOLDING-LABEL: vwop_vscale_sext_multiple_users:
; FOLDING: # %bb.0:
-; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; FOLDING-NEXT: vle8.v v8, (a0)
; FOLDING-NEXT: vle8.v v9, (a1)
; FOLDING-NEXT: vle8.v v10, (a2)
-; FOLDING-NEXT: vwmul.vv v11, v8, v9
-; FOLDING-NEXT: vwadd.vv v9, v8, v10
-; FOLDING-NEXT: vwsub.vv v12, v8, v10
-; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; FOLDING-NEXT: vor.vv v8, v11, v9
-; FOLDING-NEXT: vor.vv v8, v8, v12
+; FOLDING-NEXT: vsext.vf2 v11, v8
+; FOLDING-NEXT: vsext.vf2 v8, v9
+; FOLDING-NEXT: vsext.vf2 v9, v10
+; FOLDING-NEXT: vmul.vv v8, v11, v8
+; FOLDING-NEXT: vadd.vv v10, v11, v9
+; FOLDING-NEXT: vsub.vv v9, v11, v9
+; FOLDING-NEXT: vor.vv v8, v8, v10
+; FOLDING-NEXT: vor.vv v8, v8, v9
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i8>, ptr %x
%b = load <vscale x 2 x i8>, ptr %y
@@ -81,16 +85,18 @@ define <vscale x 2 x i16> @vwop_vscale_zext_multiple_users(ptr %x, ptr %y, ptr %
;
; FOLDING-LABEL: vwop_vscale_zext_multiple_users:
; FOLDING: # %bb.0:
-; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; FOLDING-NEXT: vle8.v v8, (a0)
; FOLDING-NEXT: vle8.v v9, (a1)
; FOLDING-NEXT: vle8.v v10, (a2)
-; FOLDING-NEXT: vwmulu.vv v11, v8, v9
-; FOLDING-NEXT: vwaddu.vv v9, v8, v10
-; FOLDING-NEXT: vwsubu.vv v12, v8, v10
-; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; FOLDING-NEXT: vor.vv v8, v11, v9
-; FOLDING-NEXT: vor.vv v8, v8, v12
+; FOLDING-NEXT: vzext.vf2 v11, v8
+; FOLDING-NEXT: vzext.vf2 v8, v9
+; FOLDING-NEXT: vzext.vf2 v9, v10
+; FOLDING-NEXT: vmul.vv v8, v11, v8
+; FOLDING-NEXT: vadd.vv v10, v11, v9
+; FOLDING-NEXT: vsub.vv v9, v11, v9
+; FOLDING-NEXT: vor.vv v8, v8, v10
+; FOLDING-NEXT: vor.vv v8, v8, v9
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i8>, ptr %x
%b = load <vscale x 2 x i8>, ptr %y
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
index de215c8..4dbaae5 100644
--- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -1141,3 +1141,39 @@ define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
%t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
ret <4 x i32> %t1
}
+
+; (x|c1)+c2 where (x|c1) is addlike
+define i32 @add_const_disjoint_or_const(i32 %arg) {
+; X86-LABEL: add_const_disjoint_or_const:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $10, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: add_const_disjoint_or_const:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 10(%rdi), %eax
+; X64-NEXT: retq
+ %t0 = or disjoint i32 %arg, 8
+ %t1 = add i32 %t0, 2
+ ret i32 %t1
+}
+
+; (x+c1)|c2 where the outer or is addlike
+define i32 @disjoint_or_const_add_const(i32 %arg) {
+; X86-LABEL: disjoint_or_const_add_const:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $10, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: disjoint_or_const_add_const:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 10(%rdi), %eax
+; X64-NEXT: retq
+ %t0 = add i32 %arg, 8
+ %t1 = or disjoint i32 %t0, 2
+ ret i32 %t1
+}
diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
deleted file mode 100644
index a3c3fc7..0000000
--- a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
+++ /dev/null
@@ -1,185 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
-
-%struct.wibble = type { %struct.wombat }
-%struct.wombat = type { %struct.ham, [3 x i8] }
-%struct.ham = type { %struct.zot }
-%struct.zot = type { %struct.blam }
-%struct.blam = type { %struct.ham.0 }
-%struct.ham.0 = type { %struct.bar }
-%struct.bar = type { %struct.bar.1 }
-%struct.bar.1 = type { %struct.baz, i8 }
-%struct.baz = type { %struct.snork }
-%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
-%struct.spam = type { %struct.snork.2, %struct.snork.2 }
-%struct.snork.2 = type { i32 }
-%struct.snork.3 = type { %struct.baz, i8, [3 x i8] }
-
-define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset %rbp, -16
-; CHECK-NEXT: movq %rsp, %rbp
-; CHECK-NEXT: .cfi_def_cfa_register %rbp
-; CHECK-NEXT: pushq %r15
-; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r13
-; CHECK-NEXT: pushq %r12
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_offset %rbx, -56
-; CHECK-NEXT: .cfi_offset %r12, -48
-; CHECK-NEXT: .cfi_offset %r13, -40
-; CHECK-NEXT: .cfi_offset %r14, -32
-; CHECK-NEXT: .cfi_offset %r15, -24
-; CHECK-NEXT: movl %r8d, %r14d
-; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movq %rsi, %r13
-; CHECK-NEXT: movq %rdi, %r15
-; CHECK-NEXT: incl %r14d
-; CHECK-NEXT: xorl %ebx, %ebx
-; CHECK-NEXT: # implicit-def: $r12
-; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: jmp .LBB0_3
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_1: # %bb17
-; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: movq %r15, %r13
-; CHECK-NEXT: xorl %r15d, %r15d
-; CHECK-NEXT: testq %rbx, %rbx
-; CHECK-NEXT: sete %r15b
-; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: callq _Znwm@PLT
-; CHECK-NEXT: shll $4, %r15d
-; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
-; CHECK-NEXT: movq %r12, %rcx
-; CHECK-NEXT: shrq $32, %rcx
-; CHECK-NEXT: movb %cl, 12(%rax)
-; CHECK-NEXT: movl %r12d, 8(%rax)
-; CHECK-NEXT: movq %r15, %rbx
-; CHECK-NEXT: movq %r13, %r15
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; CHECK-NEXT: decl %r14d
-; CHECK-NEXT: je .LBB0_8
-; CHECK-NEXT: .LBB0_3: # %bb7
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: callq widget@PLT
-; CHECK-NEXT: cmpb $-5, (%r13)
-; CHECK-NEXT: jae .LBB0_5
-; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: movl %r12d, %r12d
-; CHECK-NEXT: cmpq %r15, %rbx
-; CHECK-NEXT: jbe .LBB0_1
-; CHECK-NEXT: jmp .LBB0_7
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_5: # %bb12
-; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: movq 0, %rax
-; CHECK-NEXT: movq 8, %rax
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; CHECK-NEXT: cmpq %r15, %rbx
-; CHECK-NEXT: jbe .LBB0_1
-; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: xorl %ebx, %ebx
-; CHECK-NEXT: decl %r14d
-; CHECK-NEXT: jne .LBB0_3
-; CHECK-NEXT: .LBB0_8: # %bb21
-; CHECK-NEXT: cmpb $0, 12(%rax)
-; CHECK-NEXT: jne .LBB0_10
-; CHECK-NEXT: # %bb.9: # %bb26
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
-; CHECK-NEXT: popq %r13
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: .cfi_def_cfa %rsp, 8
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB0_10: # %bb25
-; CHECK-NEXT: .cfi_def_cfa %rbp, 16
-; CHECK-NEXT: movq %r15, %rdi
-; CHECK-NEXT: callq pluto@PLT
-bb:
- br label %bb7
-
-bb5: ; preds = %bb17, %bb14
- %phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ]
- %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ]
- %add = add i32 %phi9, 1
- %icmp = icmp eq i32 %phi9, %arg4
- br i1 %icmp, label %bb21, label %bb7
-
-bb7: ; preds = %bb5, %bb
- %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
- %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
- %phi10 = phi i40 [ undef, %bb ], [ %phi15, %bb5 ]
- %call = call ptr @widget()
- %load = load i8, ptr %arg1, align 8
- %icmp11 = icmp ult i8 %load, -5
- %and = and i40 %phi10, 4294967295
- br i1 %icmp11, label %bb14, label %bb12
-
-bb12: ; preds = %bb7
- %load13 = load volatile { i64, i64 }, ptr null, align 4294967296
- br label %bb14
-
-bb14: ; preds = %bb12, %bb7
- %phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ]
- %icmp16 = icmp ugt ptr %phi8, %arg
- br i1 %icmp16, label %bb5, label %bb17
-
-bb17: ; preds = %bb14
- %icmp18 = icmp eq ptr %phi8, null
- %zext = zext i1 %icmp18 to i64
- %call19 = call ptr @_Znwm(i64 0)
- %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
- %getelementptr20 = getelementptr i8, ptr %call19, i64 8
- store i40 %phi15, ptr %getelementptr20, align 4
- br label %bb5
-
-bb21: ; preds = %bb5
- %getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1
- %load23 = load i8, ptr %getelementptr22, align 4
- %icmp24 = icmp eq i8 %load23, 0
- br i1 %icmp24, label %bb26, label %bb25
-
-bb25: ; preds = %bb21
- call void @pluto(ptr %arg)
- unreachable
-
-bb26: ; preds = %bb21
- ret void
-}
-
-define void @eggs(ptr %arg, ptr %arg1) {
-; CHECK-LABEL: eggs:
-; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq %rsi, %rdi
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: xorl %r8d, %r8d
-; CHECK-NEXT: callq foo@PLT
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
-bb:
- call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0)
- ret void
-}
-
-declare ptr @widget()
-
-declare void @pluto(ptr)
-
-declare ptr @_Znwm(i64)
-
-attributes #0 = { noinline "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
index 190b140..8241a17 100644
--- a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
+++ b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
@@ -9,7 +9,7 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555)
; CHECK-NEXT: liveins: $edi
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
; CHECK-NEXT: JCC_1 %bb.2, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -28,7 +28,7 @@ body: |
; CHECK-NEXT: JCC_1 %bb.5, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al
+ ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al
; CHECK-NEXT: RET 0, killed undef $al
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
diff --git a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
deleted file mode 100644
index fe53aef..0000000
--- a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
+++ /dev/null
@@ -1,47 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s
-
-
-# FIXME: Need to handle subrange updates when coalescing with subreg_to_reg
-# This will fail if x86 enables subregister liveness.
----
-name: requires_new_subrange_coalesce_subreg_to_reg
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK-NEXT: liveins: $eax
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax
- ; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF
- ; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
- ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
- ; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
- ; CHECK-NEXT: RET 0, implicit %c
- bb.0:
- liveins: $eax
- %init_eax:gr32 = COPY $eax
- %a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit
- %b:gr32 = IMPLICIT_DEF
- %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
- JCC_1 %bb.2, 4, implicit undef $eflags
-
- bb.1:
- %imm0:gr32 = MOV32r0 implicit-def dead $eflags
- %a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit
- %c.sub_32bit = COPY %a
-
- bb.2:
- %c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
- RET 0, implicit %c
-
-...
diff --git a/llvm/test/CodeGen/X86/or-lea.ll b/llvm/test/CodeGen/X86/or-lea.ll
index ab9b917..616ab99 100644
--- a/llvm/test/CodeGen/X86/or-lea.ll
+++ b/llvm/test/CodeGen/X86/or-lea.ll
@@ -825,3 +825,23 @@ entry:
%or = or i64 %sub, 549755813889 ; 0x8000000001
ret i64 %or
}
+
+define i32 @or_shift1_disjoint(i32 %x, i32 %y) {
+; X86-LABEL: or_shift1_disjoint:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: or_shift1_disjoint:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rsi,%rdi,2), %eax
+; X64-NEXT: retq
+ %shl = shl i32 %x, 1
+ %or = or disjoint i32 %y, %shl
+ ret i32 %or
+}
+
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
deleted file mode 100644
index 6121a0b..0000000
--- a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
+++ /dev/null
@@ -1,348 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
-# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s
-
-# We cannot lose the liveness of the high subregister of %1 when
-# coalesced with %0, so introduce an implicit-def of the super
-# register on the MOV.
-
----
-name: coalesce_mov32r0_into_subreg_to_reg64
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64
- ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1
- ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- %0:gr32 = MOV32r0 implicit-def dead $eflags
- %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
- $rdi = COPY %1
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
----
-name: subreg_to_reg_folds_to_undef
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $rax
-
- ; CHECK-LABEL: name: subreg_to_reg_folds_to_undef
- ; CHECK: liveins: $rax
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax
- ; CHECK-NEXT: undef %4.sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def %4
- ; CHECK-NEXT: RET 0, implicit %4
- %0:gr64 = COPY killed $rax
- %1:gr32 = COPY killed %0.sub_32bit
- %2:gr32 = MOV32rr killed %1
- %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit
- %4:gr64 = COPY killed %3
- RET 0, implicit %4
-
-...
-
----
-name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
- ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
- ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
- %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
- $rdi = COPY %1
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
----
-name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
- ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1
- ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
- %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
- $rdi = COPY %1
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
----
-name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
- ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef %1.sub_8bit, implicit-def %1
- ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit %1
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- %0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit
- %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
- INLINEASM &"", 0, implicit %1
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
-
-# Reduced realistic case which was asserting after introducing new implicit-defs
----
-name: coalesce_needs_implicit_defs
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: coalesce_needs_implicit_defs
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $rdi
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
- ; CHECK-NEXT: undef %2.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %2
- ; CHECK-NEXT: undef %3.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %3
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef %10.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
- ; CHECK-NEXT: TEST64rr %3, %3, implicit-def $eflags
- ; CHECK-NEXT: %10.sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags
- ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
- ; CHECK-NEXT: CALL64r %2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: [[SHL64ri:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[SHL64ri]], 4, implicit-def dead $eflags
- ; CHECK-NEXT: [[ADD64rr:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[ADD64rr]], [[COPY]], implicit-def dead $eflags
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[ADD64rr]]
- ; CHECK-NEXT: JMP_1 %bb.1
- bb.0:
- liveins: $rdi
-
- %0:gr64 = COPY killed $rdi
- %1:gr32 = MOV32r0 implicit-def dead $eflags
- %2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
- %3:gr64 = COPY killed %2
-
- bb.1:
- %4:gr64 = COPY killed %3
- %5:gr32 = MOV32r0 implicit-def dead $eflags
- TEST64rr killed %4, %4, implicit-def $eflags
- %6:gr8 = SETCCr 4, implicit killed $eflags
- %7:gr32 = COPY killed %5
- %7.sub_8bit:gr32 = COPY killed %6
- %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- %9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
- $rdi = COPY %9
- CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- %10:gr64 = COPY killed %8
- %10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags
- %11:gr64 = COPY killed %10
- %11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags
- %3:gr64 = COPY killed %11
- JMP_1 %bb.1
-
-...
-
----
-name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
- ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
- ; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- %0:gr32 = MOV32r0 implicit-def dead $eflags
- $rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
- CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
----
-name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $eax
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
- ; CHECK: liveins: $eax
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags
- ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit
- ; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]]
- ; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- $eax = MOV32r0 implicit-def dead $eflags
- %1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit
- $rdi = COPY %1
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
-# Coalesced instruction is a copy with other implicit operands
----
-name: coalesce_copy_into_subreg_to_reg64
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $eax
- ; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64
- ; CHECK: liveins: $eax
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def %1
- ; CHECK-NEXT: $rdi = COPY %1
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- %0:gr32 = COPY $eax, implicit-def dead $eflags
- %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
- $rdi = COPY %1
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
----
-name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
- ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1
- ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def %1.sub_32bit, implicit %1.sub_32bit
- ; CHECK-NEXT: $rdi = COPY %1
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- %0:gr32 = MOV32r0 implicit-def dead $eflags
- INLINEASM &"", 0, implicit-def %0, implicit %0
- %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
- $rdi = COPY %1
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
-...
-
----
-name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
- ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
- ; CHECK-NEXT: JMP_1 %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: $rdi = COPY %1
- ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: RET 0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- bb.0:
- INLINEASM &"", 0, implicit-def %0:gr32
- JCC_1 %bb.1, 4, implicit undef $eflags
- JMP_1 %bb.2
-
- bb.1:
- %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
- $rdi = COPY %1
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- RET 0
-
- bb.2:
-
-...
-
----
-name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
- ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
- ; CHECK-NEXT: JMP_1 %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $rdi = COPY %1
- ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: JMP_1 %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- bb.0:
-
- INLINEASM &"", 0, implicit-def %0:gr32
- JCC_1 %bb.1, 4, implicit undef $eflags
- JMP_1 %bb.2
-
- bb.1:
- %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit
- $rdi = COPY %1
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- JMP_1 %bb.1
-
- bb.2:
-
-...
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll
index 22e353f..44f2d58 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll
@@ -2258,7 +2258,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7]
; AVX512F-SLOW-NEXT: vpmovqw %zmm1, %xmm3
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %ymm3
; AVX512F-SLOW-NEXT: vpmovqw %ymm3, %xmm3
; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm10
@@ -2273,7 +2272,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm10 = ymm10[0,1,2,3,4,5],ymm11[6,7]
; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm11
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm10 = ymm11[0,1,2,3],ymm10[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm10[0,1,2,3],zmm2[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm10[0,1,2,3],zmm2[0,1,2,3]
; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %xmm10
; AVX512F-SLOW-NEXT: vmovdqa 80(%rdi), %xmm11
; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %xmm13
@@ -2292,7 +2291,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm1, %zmm9
; AVX512F-SLOW-NEXT: vpmovqw %zmm9, %xmm9
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm9[0,1,2,3],ymm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm5
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm8[0,1,1,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm12[0,1,1,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1]
@@ -2307,7 +2305,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm0, %zmm9
; AVX512F-SLOW-NEXT: vpmovqw %zmm9, %xmm9
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm8 = ymm9[0,1,2,3],ymm8[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm8[0,1,2,3],zmm5[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm8[0,1,2,3],zmm5[0,1,2,3]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[3,1,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm6[0,1,2,0,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[3,1,2,3]
@@ -2324,7 +2322,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm1, %zmm13
; AVX512F-SLOW-NEXT: vpmovqw %zmm13, %xmm13
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm12 = ymm13[0,1,2,3],ymm12[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm0, %zmm12
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm13 = xmm3[0,1,2,0,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[3,1,2,3]
@@ -2341,7 +2338,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm0, %zmm14
; AVX512F-SLOW-NEXT: vpmovqw %zmm14, %xmm14
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm13 = ymm14[0,1,2,3],ymm13[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm12 = zmm13[0,1,2,3],zmm12[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm12 = zmm13[0,1,2,3],zmm12[0,1,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,3,1,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[0,1,3,1,4,5,6,7]
; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
@@ -2354,7 +2351,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm1, %zmm1
; AVX512F-SLOW-NEXT: vpmovqw %zmm1, %xmm1
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm6[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,3,1,4,5,6,7]
; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
@@ -2367,7 +2363,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm0, %zmm0
; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,2,3]
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm2, (%rsi)
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm5, (%rdx)
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm12, (%rcx)
@@ -2393,7 +2389,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm7, %ymm11, %ymm10
; AVX512F-FAST-NEXT: vpmovqw %zmm1, %xmm7
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1,2,3],ymm10[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm7
; AVX512F-FAST-NEXT: vmovdqa 96(%rdi), %ymm10
; AVX512F-FAST-NEXT: vpermd %ymm10, %ymm4, %ymm12
; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm12, %ymm13
@@ -2403,7 +2398,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm13, %ymm11, %ymm4
; AVX512F-FAST-NEXT: vpmovqw %zmm0, %xmm13
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm4 = ymm13[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm4[0,1,2,3],zmm7[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm4[0,1,2,3],zmm7[0,1,2,3]
; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31]
; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm5, %ymm13
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u>
@@ -2412,14 +2407,13 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpsrlq $16, %zmm1, %zmm13
; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm9 = ymm13[0,1,2,3],ymm9[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm9
; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm12, %ymm12
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm15, %ymm13
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm13[0,1,2,3,4,5],ymm12[6,7]
; AVX512F-FAST-NEXT: vpsrlq $16, %zmm0, %zmm13
; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm13[0,1,2,3],ymm12[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm12[0,1,2,3],zmm9[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm12[0,1,2,3],zmm9[0,1,2,3]
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm12 = [1,3,2,3,1,3,5,7]
; AVX512F-FAST-NEXT: vpermd %ymm6, %ymm12, %ymm6
; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm6, %ymm13
@@ -2429,7 +2423,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpsrlq $32, %zmm1, %zmm13
; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3],ymm15[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm13, %zmm0, %zmm13
; AVX512F-FAST-NEXT: vpermd %ymm10, %ymm12, %ymm10
; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm10, %ymm2
; AVX512F-FAST-NEXT: vpermd %ymm14, %ymm12, %ymm12
@@ -2438,21 +2431,20 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpsrlq $32, %zmm0, %zmm2
; AVX512F-FAST-NEXT: vpmovqw %zmm2, %xmm2
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm13[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm13[0,1,2,3]
; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm6, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm8, %ymm6
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3,4,5],ymm3[6,7]
; AVX512F-FAST-NEXT: vpsrlq $48, %zmm1, %zmm1
; AVX512F-FAST-NEXT: vpmovqw %zmm1, %xmm1
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm10, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm12, %ymm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7]
; AVX512F-FAST-NEXT: vpsrlq $48, %zmm0, %zmm0
; AVX512F-FAST-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,2,3]
; AVX512F-FAST-NEXT: vmovdqa64 %zmm7, (%rsi)
; AVX512F-FAST-NEXT: vmovdqa64 %zmm9, (%rdx)
; AVX512F-FAST-NEXT: vmovdqa64 %zmm2, (%rcx)
@@ -4909,285 +4901,276 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
;
; AVX512F-SLOW-LABEL: load_i16_stride4_vf64:
; AVX512F-SLOW: # %bb.0:
-; AVX512F-SLOW-NEXT: subq $104, %rsp
-; AVX512F-SLOW-NEXT: vmovdqa 240(%rdi), %xmm0
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm0[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,0,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqa 224(%rdi), %xmm2
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: subq $200, %rsp
+; AVX512F-SLOW-NEXT: vmovdqa64 256(%rdi), %zmm26
+; AVX512F-SLOW-NEXT: vmovdqa64 384(%rdi), %zmm27
+; AVX512F-SLOW-NEXT: vmovdqa64 (%rdi), %zmm28
+; AVX512F-SLOW-NEXT: vmovdqa64 128(%rdi), %zmm29
+; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %ymm0
+; AVX512F-SLOW-NEXT: vpmovqw %ymm0, %xmm0
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-SLOW-NEXT: vmovdqa 240(%rdi), %xmm14
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm14[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm9[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa 224(%rdi), %xmm13
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm13[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm6[0,1,0,2,4,5,6,7]
; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa64 112(%rdi), %xmm20
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm14 = xmm20[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm14[0,1,0,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %xmm3
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm3[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm7[0,1,0,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa64 496(%rdi), %xmm19
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm19[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,0,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqa64 480(%rdi), %xmm21
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm21[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,0,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa64 368(%rdi), %xmm17
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm17[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm4[0,1,0,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqa64 352(%rdi), %xmm24
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm24[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm1[0,1,0,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm18 = xmm9[0],xmm6[0],xmm9[1],xmm6[1]
-; AVX512F-SLOW-NEXT: vmovdqa64 320(%rdi), %xmm25
-; AVX512F-SLOW-NEXT: vmovdqa64 336(%rdi), %xmm29
-; AVX512F-SLOW-NEXT: vmovdqa 448(%rdi), %xmm11
-; AVX512F-SLOW-NEXT: vmovdqa64 464(%rdi), %xmm16
-; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %xmm9
-; AVX512F-SLOW-NEXT: vmovdqa 80(%rdi), %xmm12
-; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %xmm13
-; AVX512F-SLOW-NEXT: vmovdqa 208(%rdi), %xmm6
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm10[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm10[0],xmm5[1],xmm10[1]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm6[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm13[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm10[1,3,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm10[0],xmm5[0],xmm10[1],xmm5[1]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm14[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm12[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm9[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm27 = xmm7[0],xmm5[0],xmm7[1],xmm5[1]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm8[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm16[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX512F-SLOW-NEXT: vpmovqw %zmm29, %xmm1
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %ymm1
+; AVX512F-SLOW-NEXT: vpmovqw %ymm1, %xmm1
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
+; AVX512F-SLOW-NEXT: vmovdqa 112(%rdi), %xmm12
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm12[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %xmm11
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm11[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm7[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6,7]
+; AVX512F-SLOW-NEXT: vpmovqw %zmm28, %xmm4
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[0,1,2,3]
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vmovdqa 448(%rdi), %ymm0
+; AVX512F-SLOW-NEXT: vpmovqw %ymm0, %xmm0
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-SLOW-NEXT: vmovdqa64 496(%rdi), %xmm24
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm24[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm4[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 480(%rdi), %xmm23
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm23[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX512F-SLOW-NEXT: vpmovqw %zmm27, %xmm1
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa 320(%rdi), %ymm1
+; AVX512F-SLOW-NEXT: vpmovqw %ymm1, %xmm1
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
+; AVX512F-SLOW-NEXT: vmovdqa64 368(%rdi), %xmm31
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm31[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm5[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 352(%rdi), %xmm25
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm25[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm8[0,1,0,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm10[0],xmm0[0],xmm10[1],xmm0[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
+; AVX512F-SLOW-NEXT: vpmovqw %zmm26, %xmm1
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[0,1,2,3]
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vmovdqa64 320(%rdi), %xmm30
+; AVX512F-SLOW-NEXT: vmovdqa64 336(%rdi), %xmm17
+; AVX512F-SLOW-NEXT: vmovdqa64 448(%rdi), %xmm18
+; AVX512F-SLOW-NEXT: vmovdqa64 464(%rdi), %xmm19
+; AVX512F-SLOW-NEXT: vmovdqa64 64(%rdi), %xmm20
+; AVX512F-SLOW-NEXT: vmovdqa64 80(%rdi), %xmm21
+; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %xmm0
+; AVX512F-SLOW-NEXT: vmovdqa 208(%rdi), %xmm1
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm9[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm9[1,3,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm9[0],xmm6[0],xmm9[1],xmm6[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm29, %zmm6
+; AVX512F-SLOW-NEXT: vpmovqw %zmm6, %xmm6
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm7[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm21[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm20[0,2,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm22 = xmm7[0],xmm5[0],xmm7[1],xmm5[1]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm26 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm29[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm25[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3,4,5],ymm3[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm28, %zmm6
+; AVX512F-SLOW-NEXT: vpmovqw %zmm6, %xmm6
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3],ymm3[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[0,1,2,3]
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, (%rsp) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm4[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm15[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm19[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm18[0,2,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm30 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm0[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm10[0,1,2,0,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm14 = xmm2[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm14[0,1,2,0,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm28 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm6[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm13[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm2[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm31 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm20[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,1,2,0,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[0,1,2,0,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm23 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm12[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm15[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm9[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm20 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm19[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm21[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm12[0,1,2,0,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3,4,5],ymm2[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm27, %zmm3
+; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,1,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm17[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm30[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm26, %zmm4
+; AVX512F-SLOW-NEXT: vpmovqw %zmm4, %xmm4
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[0,1,2,3]
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm14[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[0,1,2,0,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm2, %xmm22
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm13[0,1,2,0,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm21 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm16[3,1,2,3]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm6[2,0,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[3,1,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm7[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm11[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm19 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm17[3,1,2,3]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm24[3,1,2,3]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm4[0,1,2,0,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm17 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm29[3,1,2,3]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm25[3,1,2,3]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm4[2,0,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm16 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %ymm0
-; AVX512F-SLOW-NEXT: vpmovqw %ymm0, %xmm0
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm0[0,1,2,3,4,5],ymm5[6,7]
-; AVX512F-SLOW-NEXT: vmovdqa64 128(%rdi), %zmm0
-; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm4
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm4[0,1,2,3],ymm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4
-; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %ymm5
-; AVX512F-SLOW-NEXT: vpmovqw %ymm5, %xmm5
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm8 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm8[6,7]
-; AVX512F-SLOW-NEXT: vmovdqa64 (%rdi), %zmm29
-; AVX512F-SLOW-NEXT: vpmovqw %zmm29, %xmm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm5[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa 448(%rdi), %ymm4
-; AVX512F-SLOW-NEXT: vpmovqw %ymm4, %xmm4
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
-; AVX512F-SLOW-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm5 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm5[6,7]
-; AVX512F-SLOW-NEXT: vmovdqa64 384(%rdi), %zmm25
-; AVX512F-SLOW-NEXT: vpmovqw %zmm25, %xmm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqa 320(%rdi), %ymm5
-; AVX512F-SLOW-NEXT: vpmovqw %ymm5, %xmm5
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm18, %ymm0, %ymm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm8[6,7]
-; AVX512F-SLOW-NEXT: vmovdqa64 256(%rdi), %zmm18
-; AVX512F-SLOW-NEXT: vpmovqw %zmm18, %xmm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm5[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm4 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm0, %zmm5
-; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm27, %ymm0, %ymm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm29, %zmm8
-; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm27 = zmm5[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm4 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm22, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm25, %zmm5
-; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm26, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm30, %ymm0, %ymm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm18, %zmm8
-; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm22 = zmm5[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm10[0,1,3,1,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm14[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm3[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm29, %zmm1
+; AVX512F-SLOW-NEXT: vpmovqw %zmm1, %xmm1
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm12[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,1,2,0,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm11[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm3[0,1,2,0,4,5,6,7]
; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm28, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm31, %ymm0, %ymm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm0, %zmm8
-; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm5
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[3,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm2[0],xmm6[0],xmm2[1],xmm6[1]
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm23, %ymm0, %ymm2
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm20, %ymm0, %ymm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm8[0,1,2,3,4,5],ymm2[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm29, %zmm8
-; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm8[0,1,2,3],ymm2[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm5[4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,1,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm21, %ymm0, %ymm3
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm19, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm5[0,1,2,3,4,5],ymm3[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm25, %zmm5
-; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[3,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm9[3,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm8[0],xmm5[0],xmm8[1],xmm5[1]
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm17, %ymm0, %ymm8
-; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm16, %ymm0, %ymm9
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm8 = ymm9[0,1,2,3,4,5],ymm8[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm18, %zmm9
-; AVX512F-SLOW-NEXT: vpmovqw %zmm9, %xmm9
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm8 = ymm9[0,1,2,3],ymm8[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm8[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm12[0,1,3,1,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm13[0,1,3,1,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1]
; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm6[0,1,2,3,4,5],ymm4[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm0, %zmm0
-; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm7[3,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm11[3,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1]
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3,4,5],ymm1[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm29, %zmm5
-; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3],ymm1[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm1 = mem[0,1,3,1,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm5 = mem[0,1,3,1,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1]
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm5[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm25, %zmm5
-; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4
-; AVX512F-SLOW-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm5 = mem[3,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm6 = mem[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm21[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm0[2,0,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm0, %xmm21
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm20[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm0[2,0,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm0, %xmm16
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm8
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm8[0,1,2,3,4,5],ymm4[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm28, %zmm8
+; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm8[0,1,2,3],ymm4[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm4[0,1,2,3],zmm1[0,1,2,3]
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm24[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,2,0,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm23[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm11[0,1,2,0,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm20 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm19[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm12[2,0,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm18[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm1[2,0,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm18 = xmm8[0],xmm4[0],xmm8[1],xmm4[1]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm31[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm4[0,1,2,0,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm25[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm15[0,1,2,0,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm19 = xmm9[0],xmm8[0],xmm9[1],xmm8[1]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm17[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm8[2,0,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm30[3,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm14 = xmm9[2,0,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm14[0],xmm0[0],xmm14[1],xmm0[1]
+; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm20, %ymm0, %ymm14
+; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm18, %ymm0, %ymm5
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm14[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm27, %zmm14
+; AVX512F-SLOW-NEXT: vpmovqw %zmm14, %xmm14
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm14[0,1,2,3],ymm5[4,5,6,7]
+; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm19, %ymm0, %ymm14
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm14[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm26, %zmm14
+; AVX512F-SLOW-NEXT: vpmovqw %zmm14, %xmm14
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm14[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm17 = zmm0[0,1,2,3],zmm5[0,1,2,3]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm22, %xmm0
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm13[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm6[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm7[3,1,2,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm3
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm29, %zmm3
+; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm21, %xmm3
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm16, %xmm5
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3,4,5],ymm2[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm28, %zmm3
+; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[0,1,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm10[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm11[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm12[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm4[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,3,1,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3,4,5],ymm1[6,7]
-; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm18, %zmm5
-; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5
-; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3],ymm1[4,5,6,7]
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm24, 64(%rsi)
-; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm4 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vmovaps %zmm4, (%rsi)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm22, 64(%rdx)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm27, (%rdx)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm3, 64(%rcx)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm2, (%rcx)
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm27, %zmm2
+; AVX512F-SLOW-NEXT: vpmovqw %zmm2, %xmm2
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm8[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm9[3,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7]
+; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm26, %zmm3
+; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3
+; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[0,1,2,3]
+; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vmovaps %zmm2, 64(%rsi)
+; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vmovaps %zmm2, (%rsi)
+; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vmovaps %zmm2, 64(%rdx)
+; AVX512F-SLOW-NEXT: vmovups (%rsp), %zmm2 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vmovaps %zmm2, (%rdx)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm17, 64(%rcx)
+; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vmovaps %zmm2, (%rcx)
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm1, 64(%r8)
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm0, (%r8)
-; AVX512F-SLOW-NEXT: addq $104, %rsp
+; AVX512F-SLOW-NEXT: addq $200, %rsp
; AVX512F-SLOW-NEXT: vzeroupper
; AVX512F-SLOW-NEXT: retq
;
@@ -5211,7 +5194,6 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm0, %ymm7, %ymm3
; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm0
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-FAST-NEXT: vmovdqa64 96(%rdi), %ymm27
; AVX512F-FAST-NEXT: vpermd %ymm27, %ymm1, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm3, %ymm9
@@ -5221,7 +5203,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm9, %ymm7, %ymm12
; AVX512F-FAST-NEXT: vpmovqw %zmm30, %xmm9
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm12[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm9[0,1,2,3],zmm0[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm9[0,1,2,3],zmm0[0,1,2,3]
; AVX512F-FAST-NEXT: vmovdqa64 480(%rdi), %ymm16
; AVX512F-FAST-NEXT: vpermd %ymm16, %ymm1, %ymm0
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm9
@@ -5231,7 +5213,6 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm9, %ymm7, %ymm13
; AVX512F-FAST-NEXT: vpmovqw %zmm26, %xmm9
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm13[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm9
; AVX512F-FAST-NEXT: vmovdqa64 352(%rdi), %ymm18
; AVX512F-FAST-NEXT: vpermd %ymm18, %ymm1, %ymm13
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm13, %ymm14
@@ -5241,7 +5222,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm14, %ymm7, %ymm15
; AVX512F-FAST-NEXT: vpmovqw %zmm23, %xmm14
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm14 = ymm14[0,1,2,3],ymm15[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm22 = zmm14[0,1,2,3],zmm9[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm22 = zmm14[0,1,2,3],zmm9[0,1,2,3]
; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm9 = [18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31]
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm10, %ymm14
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm10 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u>
@@ -5250,28 +5231,26 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpsrlq $16, %zmm4, %zmm14
; AVX512F-FAST-NEXT: vpmovqw %zmm14, %xmm14
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm11 = ymm14[0,1,2,3],ymm11[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm11, %zmm0, %zmm11
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm3, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm8, %ymm8
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm8[0,1,2,3,4,5],ymm3[6,7]
; AVX512F-FAST-NEXT: vpsrlq $16, %zmm30, %zmm8
; AVX512F-FAST-NEXT: vpmovqw %zmm8, %xmm8
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm8[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm29 = zmm3[0,1,2,3],zmm11[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm29 = zmm3[0,1,2,3],zmm11[0,1,2,3]
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm0, %ymm0
; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm12, %ymm3
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7]
; AVX512F-FAST-NEXT: vpsrlq $16, %zmm26, %zmm3
; AVX512F-FAST-NEXT: vpmovqw %zmm3, %xmm3
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm13, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm1, %ymm1
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm3[6,7]
; AVX512F-FAST-NEXT: vpsrlq $16, %zmm23, %zmm3
; AVX512F-FAST-NEXT: vpmovqw %zmm3, %xmm3
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm19 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm19 = zmm1[0,1,2,3],zmm0[0,1,2,3]
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm15 = [1,3,2,3,1,3,5,7]
; AVX512F-FAST-NEXT: vpermd %ymm24, %ymm15, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm3, %ymm0
@@ -5280,8 +5259,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm0, %ymm7, %ymm1
; AVX512F-FAST-NEXT: vpsrlq $32, %zmm4, %zmm0
; AVX512F-FAST-NEXT: vpmovqw %zmm0, %xmm0
-; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1
+; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512F-FAST-NEXT: vpermd %ymm27, %ymm15, %ymm0
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm13
; AVX512F-FAST-NEXT: vpermd %ymm28, %ymm15, %ymm12
@@ -5290,7 +5268,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpsrlq $32, %zmm30, %zmm13
; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3],ymm14[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm13[0,1,2,3],zmm1[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm13[0,1,2,3],zmm1[0,1,2,3]
; AVX512F-FAST-NEXT: vpermd %ymm16, %ymm15, %ymm13
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm13, %ymm1
; AVX512F-FAST-NEXT: vpermd %ymm17, %ymm15, %ymm14
@@ -5298,8 +5276,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpermt2d %ymm1, %ymm7, %ymm11
; AVX512F-FAST-NEXT: vpsrlq $32, %zmm26, %zmm1
; AVX512F-FAST-NEXT: vpmovqw %zmm1, %xmm1
-; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm11[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm11
+; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm11 = ymm1[0,1,2,3],ymm11[4,5,6,7]
; AVX512F-FAST-NEXT: vpermd %ymm18, %ymm15, %ymm1
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm1, %ymm2
; AVX512F-FAST-NEXT: vpermd %ymm20, %ymm15, %ymm5
@@ -5308,35 +5285,33 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpsrlq $32, %zmm23, %zmm2
; AVX512F-FAST-NEXT: vpmovqw %zmm2, %xmm2
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm6[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm11[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm11[0,1,2,3]
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm3, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm8, %ymm6
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3,4,5],ymm3[6,7]
; AVX512F-FAST-NEXT: vpsrlq $48, %zmm4, %zmm4
; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm0, %ymm0
; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm12, %ymm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3,4,5],ymm0[6,7]
; AVX512F-FAST-NEXT: vpsrlq $48, %zmm30, %zmm4
; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm3[0,1,2,3]
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm13, %ymm3
; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm14, %ymm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7]
; AVX512F-FAST-NEXT: vpsrlq $48, %zmm26, %zmm4
; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm1, %ymm1
; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm5, %ymm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3,4,5],ymm1[6,7]
; AVX512F-FAST-NEXT: vpsrlq $48, %zmm23, %zmm4
; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm3[0,1,2,3]
; AVX512F-FAST-NEXT: vmovdqa64 %zmm22, 64(%rsi)
; AVX512F-FAST-NEXT: vmovdqa64 %zmm21, (%rsi)
; AVX512F-FAST-NEXT: vmovdqa64 %zmm19, 64(%rdx)
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
index e2195f1..e231124 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
@@ -1943,8 +1943,7 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,0,4,0,4,8,12]
; AVX512F-NEXT: vpermt2d %ymm5, %ymm1, %ymm6
; AVX512F-NEXT: vpmovdb %zmm2, %xmm5
-; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm6[4,5,6,7]
-; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm8
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm8 = ymm5[0,1,2,3],ymm6[4,5,6,7]
; AVX512F-NEXT: vmovdqa 96(%rdi), %ymm5
; AVX512F-NEXT: vpshufb %ymm7, %ymm5, %ymm9
; AVX512F-NEXT: vmovdqa 64(%rdi), %ymm6
@@ -1952,7 +1951,7 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-NEXT: vpermt2d %ymm9, %ymm1, %ymm7
; AVX512F-NEXT: vpmovdb %zmm0, %xmm9
; AVX512F-NEXT: vpblendd {{.*#+}} ymm7 = ymm9[0,1,2,3],ymm7[4,5,6,7]
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm7[0,1,2,3],zmm8[4,5,6,7]
+; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm7[0,1,2,3],zmm8[0,1,2,3]
; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm8 = [1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13]
; AVX512F-NEXT: vpshufb %ymm8, %ymm3, %ymm9
; AVX512F-NEXT: vpshufb %ymm8, %ymm4, %ymm10
@@ -1960,14 +1959,13 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-NEXT: vpsrld $8, %zmm2, %zmm9
; AVX512F-NEXT: vpmovdb %zmm9, %xmm9
; AVX512F-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm10[4,5,6,7]
-; AVX512F-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm9
; AVX512F-NEXT: vpshufb %ymm8, %ymm5, %ymm10
; AVX512F-NEXT: vpshufb %ymm8, %ymm6, %ymm8
; AVX512F-NEXT: vpermt2d %ymm10, %ymm1, %ymm8
; AVX512F-NEXT: vpsrld $8, %zmm0, %zmm10
; AVX512F-NEXT: vpmovdb %zmm10, %xmm10
; AVX512F-NEXT: vpblendd {{.*#+}} ymm8 = ymm10[0,1,2,3],ymm8[4,5,6,7]
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm8[0,1,2,3],zmm9[4,5,6,7]
+; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm8[0,1,2,3],zmm9[0,1,2,3]
; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm9 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14]
; AVX512F-NEXT: vpshufb %ymm9, %ymm3, %ymm10
; AVX512F-NEXT: vpshufb %ymm9, %ymm4, %ymm11
@@ -1975,14 +1973,13 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-NEXT: vpsrld $16, %zmm2, %zmm10
; AVX512F-NEXT: vpmovdb %zmm10, %xmm10
; AVX512F-NEXT: vpblendd {{.*#+}} ymm10 = ymm10[0,1,2,3],ymm11[4,5,6,7]
-; AVX512F-NEXT: vinserti64x4 $1, %ymm10, %zmm0, %zmm10
; AVX512F-NEXT: vpshufb %ymm9, %ymm5, %ymm11
; AVX512F-NEXT: vpshufb %ymm9, %ymm6, %ymm9
; AVX512F-NEXT: vpermt2d %ymm11, %ymm1, %ymm9
; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm11
; AVX512F-NEXT: vpmovdb %zmm11, %xmm11
; AVX512F-NEXT: vpblendd {{.*#+}} ymm9 = ymm11[0,1,2,3],ymm9[4,5,6,7]
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm9[0,1,2,3],zmm10[4,5,6,7]
+; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm9[0,1,2,3],zmm10[0,1,2,3]
; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm10 = [3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15]
; AVX512F-NEXT: vpshufb %ymm10, %ymm3, %ymm3
; AVX512F-NEXT: vpshufb %ymm10, %ymm4, %ymm4
@@ -1990,14 +1987,13 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-NEXT: vpsrld $24, %zmm2, %zmm2
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
; AVX512F-NEXT: vpshufb %ymm10, %ymm5, %ymm3
; AVX512F-NEXT: vpshufb %ymm10, %ymm6, %ymm4
; AVX512F-NEXT: vpermt2d %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vpsrld $24, %zmm0, %zmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4,5,6,7]
-; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[4,5,6,7]
+; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[0,1,2,3]
; AVX512F-NEXT: vmovdqa64 %zmm7, (%rsi)
; AVX512F-NEXT: vmovdqa64 %zmm8, (%rdx)
; AVX512F-NEXT: vmovdqa64 %zmm9, (%rcx)
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
index d253dd1..f3ec442 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
@@ -5100,7 +5100,7 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm6[0,1,2,3,5,4,6,7,8,9,10,11,13,12,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm3[2,2,2,2,6,6,6,6]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm10 = ymm1[0],ymm0[1],ymm1[2,3],ymm0[4],ymm1[5,6,7,8],ymm0[9],ymm1[10,11],ymm0[12],ymm1[13,14,15]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm11 = ymm1[0],ymm0[1],ymm1[2,3],ymm0[4],ymm1[5,6,7,8],ymm0[9],ymm1[10,11],ymm0[12],ymm1[13,14,15]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm6[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm3[3,3,3,3,7,7,7,7]
@@ -5139,8 +5139,8 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vmovdqa64 %xmm20, %xmm0
; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm9, %xmm0
; AVX512F-SLOW-NEXT: vmovdqa 32(%rdx), %xmm15
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm15[1,1,2,2]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm11[0],xmm0[1],xmm11[2,3],xmm0[4],xmm11[5,6],xmm0[7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm15[1,1,2,2]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm10[0],xmm0[1],xmm10[2,3],xmm0[4],xmm10[5,6],xmm0[7]
; AVX512F-SLOW-NEXT: vmovdqa64 %ymm0, %ymm26
; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm15[0],xmm9[0],xmm15[1],xmm9[1],xmm15[2],xmm9[2],xmm15[3],xmm9[3]
; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm9 = xmm15[4],xmm9[4],xmm15[5],xmm9[5],xmm15[6],xmm9[6],xmm15[7],xmm9[7]
@@ -5165,13 +5165,13 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vmovdqa 32(%rax), %ymm6
; AVX512F-SLOW-NEXT: vpermd %zmm6, %zmm3, %zmm3
; AVX512F-SLOW-NEXT: vmovdqa64 %ymm28, %ymm7
-; AVX512F-SLOW-NEXT: vpshufb %ymm7, %ymm6, %ymm11
+; AVX512F-SLOW-NEXT: vpshufb %ymm7, %ymm6, %ymm10
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm6[0,1,1,3,4,5,5,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,2,2,3]
; AVX512F-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm11, %zmm6
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm10, %zmm6
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} xmm7 = [0,1,2,3,4,5,4,5,6,7,10,11,8,9,10,11]
-; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm2, %xmm11
+; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm2, %xmm10
; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm1, %xmm1
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm13
; AVX512F-SLOW-NEXT: vmovdqa (%rsi), %xmm14
@@ -5195,10 +5195,10 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm15 = xmm15[2,1,2,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm15 = xmm15[0,1,2,3,4,5,5,4]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm15 = ymm15[0,0,1,3]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,0,1,1]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,0,1,1]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm28 = ymm30[2,2,2,3]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm29 = ymm23[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,2,2,3]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm19[2,1,3,3]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm30 = ymm18[0,2,2,3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
@@ -5217,21 +5217,21 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm12[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm5 = ymm5[0],ymm12[1],ymm5[2,3],ymm12[4],ymm5[5,6,7,8],ymm12[9],ymm5[10,11],ymm12[12],ymm5[13,14,15]
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm29, %zmm28, %zmm12
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm10, %zmm7
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm12, %zmm10, %zmm7
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm11, %zmm7
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm11 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm12, %zmm11, %zmm7
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm30, %zmm0
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm31, %zmm1
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm10, %zmm1
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm11, %zmm1
; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm9, %zmm0 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm15, %zmm9 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm10, %zmm9
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm11 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm11, %zmm9
; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm11[0,1,2,3],zmm0[4,5,6,7]
-; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm11 = mem[2,1,3,2]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm10[0,1,2,3],zmm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm10 = mem[2,1,3,2]
; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm12 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm12 = mem[2,2,2,3]
; AVX512F-SLOW-NEXT: vpshufd $254, {{[-0-9]+}}(%r{{[sb]}}p), %ymm15 # 32-byte Folded Reload
@@ -5258,7 +5258,7 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
; AVX512F-SLOW-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm16, %zmm0
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm0
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm9
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm10, %zmm9
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm7, %zmm9
; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm3
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm15[2,1,3,2]
@@ -5269,7 +5269,7 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm7
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm8, %zmm17, %zmm1
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm18, %zmm8
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm10, %zmm8
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm11, %zmm8
; AVX512F-SLOW-NEXT: vpbroadcastd 36(%rax), %ymm1
; AVX512F-SLOW-NEXT: vpbroadcastd 40(%rax), %ymm9
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm1, %zmm1
@@ -5281,9 +5281,8 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm28, %zmm9, %zmm9
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm9
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm29, %zmm0, %zmm8
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm10[0,1,2,3],zmm8[4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm8[0,1,2,3],zmm29[0,1,2,3]
; AVX512F-SLOW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm6
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm6
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm31, %zmm30, %zmm8
@@ -10685,14 +10684,14 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
;
; AVX512F-SLOW-LABEL: store_i16_stride7_vf64:
; AVX512F-SLOW: # %bb.0:
-; AVX512F-SLOW-NEXT: subq $2200, %rsp # imm = 0x898
+; AVX512F-SLOW-NEXT: subq $2168, %rsp # imm = 0x878
; AVX512F-SLOW-NEXT: vmovdqa 96(%rcx), %ymm2
; AVX512F-SLOW-NEXT: vmovdqa 96(%rdx), %ymm9
; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %ymm7
; AVX512F-SLOW-NEXT: vmovdqa 96(%rsi), %ymm8
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm0 = [128,128,128,128,128,128,128,128,14,15,128,128,128,128,128,128,128,128,128,128,128,128,16,17,128,128,128,128,128,128,128,128]
; AVX512F-SLOW-NEXT: vpshufb %ymm0, %ymm2, %ymm1
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm19
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm18
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm11 = <u,u,u,u,12,13,14,15,128,128,u,u,u,u,u,u,u,u,u,u,16,17,128,128,u,u,u,u,u,u,u,u>
; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm9, %ymm2
; AVX512F-SLOW-NEXT: vporq %ymm1, %ymm2, %ymm16
@@ -10705,16 +10704,16 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vmovdqa 64(%r9), %ymm2
; AVX512F-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm1
; AVX512F-SLOW-NEXT: vmovdqa %ymm3, %ymm10
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm20
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm21
; AVX512F-SLOW-NEXT: vmovdqa 64(%r8), %ymm3
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm15 = <u,u,u,u,u,u,u,u,12,13,14,15,128,128,u,u,u,u,u,u,u,u,u,u,16,17,128,128,u,u,u,u>
; AVX512F-SLOW-NEXT: vpshufb %ymm15, %ymm3, %ymm2
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm3, %ymm24
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm3, %ymm20
; AVX512F-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa 64(%rcx), %ymm2
; AVX512F-SLOW-NEXT: vpshufb %ymm0, %ymm2, %ymm1
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm18
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm23
; AVX512F-SLOW-NEXT: vmovdqa 64(%rdx), %ymm6
; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm6, %ymm2
; AVX512F-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1
@@ -10744,10 +10743,10 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa (%rsi), %ymm2
; AVX512F-SLOW-NEXT: vpshufb %ymm12, %ymm2, %ymm1
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm23
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm28
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %ymm10
; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm10, %ymm2
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm10, %ymm25
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm10, %ymm22
; AVX512F-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa 32(%rcx), %ymm2
@@ -10780,7 +10779,7 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm12 = [22,23,26,27,0,0,24,25,26,27,0,0,26,27,26,27,22,23,26,27,0,0,24,25,26,27,0,0,26,27,26,27]
; AVX512F-SLOW-NEXT: # ymm12 = mem[0,1,0,1]
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm19, %ymm14
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm18, %ymm14
; AVX512F-SLOW-NEXT: vpshufb %ymm12, %ymm14, %ymm10
; AVX512F-SLOW-NEXT: vmovdqa64 %ymm12, %ymm19
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm9[2,2,2,2,6,6,6,6]
@@ -10814,54 +10813,53 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm16, %zmm8
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm17, %zmm7
; AVX512F-SLOW-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm7
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vmovdqa 96(%r8), %ymm12
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm26 = [65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vmovdqa 96(%r8), %ymm10
+; AVX512F-SLOW-NEXT: vmovdqu %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vpshufb {{.*#+}} ymm8 = ymm10[u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm10[14,15,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm10[16,17,u,u,u,u],zero,zero
+; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm26, %ymm7, %ymm8
+; AVX512F-SLOW-NEXT: vmovdqa 96(%r9), %ymm12
; AVX512F-SLOW-NEXT: vmovdqu %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpshufb {{.*#+}} ymm8 = ymm12[u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm12[14,15,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm12[16,17,u,u,u,u],zero,zero
-; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm17, %ymm7, %ymm8
-; AVX512F-SLOW-NEXT: vmovdqa 96(%r9), %ymm14
-; AVX512F-SLOW-NEXT: vmovdqu %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpshufb %ymm3, %ymm14, %ymm9
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm10, %ymm8, %ymm9
+; AVX512F-SLOW-NEXT: vpshufb %ymm3, %ymm12, %ymm9
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm14 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm14, %ymm8, %ymm9
; AVX512F-SLOW-NEXT: vextracti64x4 $1, %zmm7, %ymm7
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm8 = ymm12[0,0,2,1,4,4,6,5]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm8 = ymm10[0,0,2,1,4,4,6,5]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,3,3]
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0]
; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm7, %ymm10, %ymm8
-; AVX512F-SLOW-NEXT: vprold $16, %ymm14, %ymm7
+; AVX512F-SLOW-NEXT: vprold $16, %ymm12, %ymm7
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,2,2,2]
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm8, %ymm10, %ymm7
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm10, %zmm16
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm14 = [65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm8, %ymm14, %ymm7
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm7
; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm9[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; AVX512F-SLOW-NEXT: vbroadcasti64x4 {{.*#+}} zmm8 = [6,5,0,0,7,6,0,7,6,5,0,0,7,6,0,7]
-; AVX512F-SLOW-NEXT: # zmm8 = mem[0,1,2,3,0,1,2,3]
+; AVX512F-SLOW-NEXT: vbroadcasti64x4 {{.*#+}} zmm18 = [6,5,0,0,7,6,0,7,6,5,0,0,7,6,0,7]
+; AVX512F-SLOW-NEXT: # zmm18 = mem[0,1,2,3,0,1,2,3]
; AVX512F-SLOW-NEXT: vmovdqa 96(%rax), %ymm7
-; AVX512F-SLOW-NEXT: vpermd %zmm7, %zmm8, %zmm9
+; AVX512F-SLOW-NEXT: vpermd %zmm7, %zmm18, %zmm9
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm9 = ymm7[0,1,1,3,4,5,5,7]
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm10 = [12,13,128,128,128,128,128,128,128,128,128,128,128,128,14,15,128,128,128,128,128,128,128,128,128,128,128,128,16,17,128,128]
; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm7, %ymm7
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpandnq %ymm9, %ymm17, %ymm9
+; AVX512F-SLOW-NEXT: vpandnq %ymm9, %ymm26, %ymm9
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm7, %zmm7
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm14 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535]
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm16 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535]
; AVX512F-SLOW-NEXT: vpbroadcastd 72(%rax), %ymm7
-; AVX512F-SLOW-NEXT: vpandn %ymm7, %ymm14, %ymm9
+; AVX512F-SLOW-NEXT: vpandnq %ymm7, %ymm16, %ymm9
; AVX512F-SLOW-NEXT: vmovdqa 64(%rax), %ymm7
; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm7, %ymm12
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm9, %zmm9
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vpbroadcastd 8(%rax), %ymm9
-; AVX512F-SLOW-NEXT: vpandn %ymm9, %ymm14, %ymm9
-; AVX512F-SLOW-NEXT: vmovdqa (%rax), %ymm14
-; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm14, %ymm12
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm14, %ymm22
+; AVX512F-SLOW-NEXT: vpandnq %ymm9, %ymm16, %ymm9
+; AVX512F-SLOW-NEXT: vmovdqa (%rax), %ymm8
+; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm8, %ymm12
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm8, %ymm24
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm9, %zmm9
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm9 = ymm2[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15]
@@ -10878,35 +10876,35 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm9, %ymm10
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm9[0,1,1,3,4,5,5,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm12 = ymm12[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpandnq %ymm12, %ymm17, %ymm12
+; AVX512F-SLOW-NEXT: vpandnq %ymm12, %ymm26, %ymm12
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm10, %zmm10
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm10 = ymm13[0,0,2,1,4,4,6,5]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm12 = ymm15[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm12[0,0,0,0,4,4,4,4]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm10 = ymm12[0,1,2],ymm10[3],ymm12[4,5],ymm10[6],ymm12[7,8,9,10],ymm10[11],ymm12[12,13],ymm10[14],ymm12[15]
-; AVX512F-SLOW-NEXT: vmovdqu %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm8 = ymm12[0,1,2],ymm10[3],ymm12[4,5],ymm10[6],ymm12[7,8,9,10],ymm10[11],ymm12[12,13],ymm10[14],ymm12[15]
+; AVX512F-SLOW-NEXT: vmovdqu %ymm8, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa64 %ymm19, %ymm13
; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm2, %ymm10
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm1[2,2,2,2,6,6,6,6]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm10 = ymm10[0,1],ymm12[2],ymm10[3,4],ymm12[5],ymm10[6,7,8,9],ymm12[10],ymm10[11,12],ymm12[13],ymm10[14,15]
-; AVX512F-SLOW-NEXT: vmovdqu %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm8 = ymm10[0,1],ymm12[2],ymm10[3,4],ymm12[5],ymm10[6,7,8,9],ymm12[10],ymm10[11,12],ymm12[13],ymm10[14,15]
+; AVX512F-SLOW-NEXT: vmovdqu %ymm8, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,3,3,3,7,7,7,7]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6,7,8],ymm2[9],ymm1[10,11],ymm2[12],ymm1[13,14,15]
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm31
+; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm0[0,1,2,3,5,4,6,7,8,9,10,11,13,12,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm11[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3],ymm1[4],ymm2[5,6,7,8],ymm1[9],ymm2[10,11],ymm1[12],ymm2[13,14,15]
-; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm31
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm11[3,3,3,3,7,7,7,7]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6],ymm1[7,8,9,10],ymm0[11],ymm1[12,13],ymm0[14],ymm1[15]
; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpermd %zmm9, %zmm8, %zmm0
+; AVX512F-SLOW-NEXT: vpermd %zmm9, %zmm18, %zmm0
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm5[1,2,3,3,4,5,6,7,9,10,11,11,12,13,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,1,4,4,6,5]
@@ -10918,31 +10916,31 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm4[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3],ymm0[4],ymm1[5,6,7,8],ymm0[9],ymm1[10,11],ymm0[12],ymm1[13,14,15]
; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm18, %ymm2
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm23, %ymm2
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm2[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm6[0,1,1,3,4,5,5,7]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15]
-; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm0, %ymm30
; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm2, %ymm0
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm18, %ymm3
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm23, %ymm3
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm6[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7,8,9],ymm1[10],ymm0[11,12],ymm1[13],ymm0[14,15]
; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm20, %ymm8
-; AVX512F-SLOW-NEXT: vprold $16, %ymm20, %ymm0
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm24[1,2,2,3,5,6,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm21, %ymm8
+; AVX512F-SLOW-NEXT: vprold $16, %ymm21, %ymm0
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm20[1,2,2,3,5,6,6,7]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm8[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm24[0,0,2,1,4,4,6,5]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm20[0,0,2,1,4,4,6,5]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5],ymm2[6],ymm1[7,8,9,10],ymm2[11],ymm1[12,13],ymm2[14],ymm1[15]
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [2,2,3,3,10,9,11,10]
; AVX512F-SLOW-NEXT: vpermt2q %zmm0, %zmm10, %zmm1
; AVX512F-SLOW-NEXT: vbroadcasti64x4 {{.*#+}} zmm11 = [0,5,4,0,0,6,5,0,0,5,4,0,0,6,5,0]
; AVX512F-SLOW-NEXT: # zmm11 = mem[0,1,2,3,0,1,2,3]
; AVX512F-SLOW-NEXT: vpermd 64(%rax), %zmm11, %zmm0
-; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm17, %zmm0
+; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm26, %zmm0
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm6[3,3,3,3,7,7,7,7]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm3[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
@@ -10954,61 +10952,61 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5],ymm1[6],ymm0[7,8,9,10],ymm1[11],ymm0[12,13],ymm1[14],ymm0[15]
; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm24[3,3,3,3,7,7,7,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm20[3,3,3,3,7,7,7,7]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm8[0,1,2,3,5,6,7,7,8,9,10,11,13,14,15,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,3,6,6,6,7]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15]
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm12 = [4,5,4,5,4,5,6,7,16,17,16,17,16,17,17,19]
; AVX512F-SLOW-NEXT: vmovdqa 96(%r9), %xmm0
; AVX512F-SLOW-NEXT: vmovdqa 96(%r8), %xmm1
-; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[0,1,3,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpermt2d %zmm4, %zmm12, %zmm2
+; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,1,3,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpermt2d %zmm4, %zmm12, %zmm3
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm7[2,3,3,3,6,7,7,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm4 = ymm4[2,1,3,2]
; AVX512F-SLOW-NEXT: vpbroadcastd 96(%rax), %ymm5
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm2, %zmm5, %zmm4
+; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm3, %zmm5, %zmm4
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm5, %zmm15
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa 96(%rsi), %xmm2
+; AVX512F-SLOW-NEXT: vmovdqa 96(%rsi), %xmm3
; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %xmm4
-; AVX512F-SLOW-NEXT: vprold $16, %xmm2, %xmm5
+; AVX512F-SLOW-NEXT: vprold $16, %xmm3, %xmm5
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[1,1,2,3]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2],xmm6[3,4],xmm5[5],xmm6[6,7]
; AVX512F-SLOW-NEXT: vmovdqu %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm18
-; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa 96(%rcx), %xmm2
+; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm23
+; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; AVX512F-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512F-SLOW-NEXT: vmovdqa 96(%rcx), %xmm3
; AVX512F-SLOW-NEXT: vmovdqa 96(%rdx), %xmm4
; AVX512F-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm6 = [6,7,4,5,0,0,8,9,6,7,4,5,0,0,8,9]
-; AVX512F-SLOW-NEXT: vpshufb %xmm6, %xmm2, %xmm5
+; AVX512F-SLOW-NEXT: vpshufb %xmm6, %xmm3, %xmm5
; AVX512F-SLOW-NEXT: vmovdqa %xmm6, %xmm7
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[1,1,2,2]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6],xmm5[7]
; AVX512F-SLOW-NEXT: vmovdqu %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; AVX512F-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,5,7,6]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,6]
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,4,5,6,7,10,11,8,9,10,11]
; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm1, %xmm1
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm28 = <16,18,19,19,19,19,u,u,0,1,0,1,2,3,2,3>
-; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm28, %zmm1
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm29 = <16,18,19,19,19,19,u,u,0,1,0,1,2,3,2,3>
+; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm29, %zmm1
; AVX512F-SLOW-NEXT: vpbroadcastd 100(%rax), %ymm2
; AVX512F-SLOW-NEXT: vpbroadcastd 104(%rax), %ymm3
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
-; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm16, %zmm2
+; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm14, %zmm2
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa 64(%rcx), %xmm1
; AVX512F-SLOW-NEXT: vmovdqa 64(%rdx), %xmm2
; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm3, %xmm30
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm3, %xmm27
; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm1, %xmm1
; AVX512F-SLOW-NEXT: vmovdqa %xmm7, %xmm8
@@ -11023,37 +11021,37 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm5, %xmm5
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm5 = ymm5[0,0,1,1]
; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm6, %xmm26
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm6, %xmm25
; AVX512F-SLOW-NEXT: vprold $16, %xmm4, %xmm4
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2],xmm2[3,4],xmm4[5],xmm2[6,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm4
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0]
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm4
-; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0]
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa 64(%r9), %xmm1
-; AVX512F-SLOW-NEXT: vmovdqa 64(%r8), %xmm4
-; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm16
-; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,5,7,6]
+; AVX512F-SLOW-NEXT: vmovdqa 64(%r8), %xmm2
+; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm17
+; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5,7,6]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,2,4,5,6,7]
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm6 = <0,1,0,1,0,1,1,3,16,18,19,19,19,19,u,u>
-; AVX512F-SLOW-NEXT: vpermt2d %zmm4, %zmm6, %zmm1
-; AVX512F-SLOW-NEXT: vpbroadcastd 64(%rax), %ymm4
+; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm6, %zmm1
+; AVX512F-SLOW-NEXT: vpbroadcastd 64(%rax), %ymm2
; AVX512F-SLOW-NEXT: vpbroadcastd 68(%rax), %ymm5
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm2, %zmm2
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm9 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm9, %zmm4
-; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm9, %zmm2
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa (%rcx), %xmm1
; AVX512F-SLOW-NEXT: vmovdqa (%rdx), %xmm5
-; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm4, %xmm19
+; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm2, %xmm20
; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm7 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3]
; AVX512F-SLOW-NEXT: vpshufb %xmm8, %xmm1, %xmm1
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm8, %xmm29
+; AVX512F-SLOW-NEXT: vmovdqa %xmm8, %xmm14
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[1,1,2,2]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0],xmm1[1],xmm5[2,3],xmm1[4],xmm5[5,6],xmm1[7]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm7[0,1,3,2,4,5,6,7]
@@ -11063,150 +11061,153 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm7 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm7, %xmm7
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm7[0,0,1,1]
-; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm14 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
+; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm2, %xmm19
; AVX512F-SLOW-NEXT: vprold $16, %xmm5, %xmm5
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm5[2],xmm3[3,4],xmm5[5],xmm3[6,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,0,2,1]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm7, %zmm3
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm3
-; AVX512F-SLOW-NEXT: vmovdqu64 %zmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm7, %zmm2
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2
+; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vmovdqa (%r9), %xmm1
-; AVX512F-SLOW-NEXT: vmovdqa (%r8), %xmm2
-; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5,7,6]
+; AVX512F-SLOW-NEXT: vmovdqa (%r8), %xmm3
+; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,5,7,6]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm6, %zmm1
-; AVX512F-SLOW-NEXT: vpbroadcastd (%rax), %ymm2
-; AVX512F-SLOW-NEXT: vpbroadcastd 4(%rax), %ymm3
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
+; AVX512F-SLOW-NEXT: vpermt2d %zmm3, %zmm6, %zmm1
+; AVX512F-SLOW-NEXT: vpbroadcastd (%rax), %ymm3
+; AVX512F-SLOW-NEXT: vpbroadcastd 4(%rax), %ymm4
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm2
; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm9, %zmm2
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm23, %ymm7
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm28, %ymm7
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm7[1,2,3,3,4,5,6,7,9,10,11,11,12,13,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,1,4,4,6,5]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm25[1,1,1,1,5,5,5,5]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7,8,9],ymm2[10],ymm1[11,12],ymm2[13],ymm1[14,15]
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm20
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm22, %ymm2
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm22[1,1,1,1,5,5,5,5]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm4[2],ymm1[3,4],ymm4[5],ymm1[6,7,8,9],ymm4[10],ymm1[11,12],ymm4[13],ymm1[14,15]
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm22
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm7[0,1,2,3,5,4,6,7,8,9,10,11,13,12,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm25[2,2,2,2,6,6,6,6]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3],ymm1[4],ymm2[5,6,7,8],ymm1[9],ymm2[10,11],ymm1[12],ymm2[13,14,15]
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm27
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm2[2,2,2,2,6,6,6,6]
+; AVX512F-SLOW-NEXT: vmovdqa %ymm2, %ymm9
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm4[0],ymm1[1],ymm4[2,3],ymm1[4],ymm4[5,6,7,8],ymm1[9],ymm4[10,11],ymm1[12],ymm4[13,14,15]
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm21
+; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
+; AVX512F-SLOW-NEXT: vprold $16, %ymm2, %ymm1
; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm3 # 32-byte Reload
-; AVX512F-SLOW-NEXT: vprold $16, %ymm3, %ymm1
-; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm5[1,2,2,3,5,6,6,7]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4],ymm1[5],ymm2[6,7,8,9],ymm1[10],ymm2[11,12],ymm1[13],ymm2[14,15]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm3[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,0,0,0,4,4,4,4]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm5[0,0,2,1,4,4,6,5]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2],ymm6[3],ymm2[4,5],ymm6[6],ymm2[7,8,9,10],ymm6[11],ymm2[12,13],ymm6[14],ymm2[15]
-; AVX512F-SLOW-NEXT: vpermt2q %zmm1, %zmm10, %zmm2
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm3[1,2,2,3,5,6,6,7]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm4[0,1],ymm1[2],ymm4[3,4],ymm1[5],ymm4[6,7,8,9],ymm1[10],ymm4[11,12],ymm1[13],ymm4[14,15]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm4 = ymm2[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[0,0,0,0,4,4,4,4]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm3[0,0,2,1,4,4,6,5]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm4 = ymm4[0,1,2],ymm6[3],ymm4[4,5],ymm6[6],ymm4[7,8,9,10],ymm6[11],ymm4[12,13],ymm6[14],ymm4[15]
+; AVX512F-SLOW-NEXT: vpermt2q %zmm1, %zmm10, %zmm4
; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm8 # 32-byte Reload
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm8[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15]
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Reload
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm10[0,1,1,3,4,5,5,7]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm6[0,1],ymm1[2],ymm6[3,4],ymm1[5],ymm6[6,7,8,9],ymm1[10],ymm6[11,12],ymm1[13],ymm6[14,15]
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm24
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm16
; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm8, %ymm1
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm10[2,2,2,2,6,6,6,6]
; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm6[2],ymm1[3,4],ymm6[5],ymm1[6,7,8,9],ymm6[10],ymm1[11,12],ymm6[13],ymm1[14,15]
-; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm21
+; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm18
; AVX512F-SLOW-NEXT: vpermd (%rax), %zmm11, %zmm1
-; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm2, %zmm17, %zmm1
+; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm4, %zmm26, %zmm1
; AVX512F-SLOW-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm10[3,3,3,3,7,7,7,7]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm8[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,2,6,6,6,6]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6,7,8],ymm2[9],ymm1[10,11],ymm2[12],ymm1[13,14,15]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm4 = ymm8[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[2,2,2,2,6,6,6,6]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2,3],ymm4[4],ymm1[5,6,7,8],ymm4[9],ymm1[10,11],ymm4[12],ymm1[13,14,15]
; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm25[3,3,3,3,7,7,7,7]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm7[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,2,6,6,6,6]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5],ymm2[6],ymm1[7,8,9,10],ymm2[11],ymm1[12,13],ymm2[14],ymm1[15]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm9[3,3,3,3,7,7,7,7]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm4 = ymm7[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[2,2,2,2,6,6,6,6]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm4[3],ymm1[4,5],ymm4[6],ymm1[7,8,9,10],ymm4[11],ymm1[12,13],ymm4[14],ymm1[15]
; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm5[3,3,3,3,7,7,7,7]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm3[0,1,2,3,5,6,7,7,8,9,10,11,13,14,15,15]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,3,6,6,6,7]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4],ymm1[5],ymm2[6,7,8,9],ymm1[10],ymm2[11,12],ymm1[13],ymm2[14,15]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm3[3,3,3,3,7,7,7,7]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm4 = ymm2[0,1,2,3,5,6,7,7,8,9,10,11,13,14,15,15]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[2,2,2,3,6,6,6,7]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm4[0,1],ymm1[2],ymm4[3,4],ymm1[5],ymm4[6,7,8,9],ymm1[10],ymm4[11,12],ymm1[13],ymm4[14,15]
; AVX512F-SLOW-NEXT: vmovdqa 32(%r9), %xmm7
-; AVX512F-SLOW-NEXT: vmovdqa 32(%r8), %xmm2
-; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3]
+; AVX512F-SLOW-NEXT: vmovdqa 32(%r8), %xmm4
+; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3]
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm11 = xmm6[0,1,3,2,4,5,6,7]
; AVX512F-SLOW-NEXT: vpermt2d %zmm11, %zmm12, %zmm1
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm11 = ymm22[2,3,3,3,6,7,7,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm11 = ymm24[2,3,3,3,6,7,7,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,1,3,2]
; AVX512F-SLOW-NEXT: vpbroadcastd 32(%rax), %ymm12
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm25
-; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm15, %zmm25
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm26
+; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm15, %zmm26
; AVX512F-SLOW-NEXT: vmovdqa 32(%rdi), %xmm1
; AVX512F-SLOW-NEXT: vmovdqa 32(%rsi), %xmm12
; AVX512F-SLOW-NEXT: vprold $16, %xmm12, %xmm15
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm1[1,1,2,3]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm13[0,1],xmm15[2],xmm13[3,4],xmm15[5],xmm13[6,7]
-; AVX512F-SLOW-NEXT: vmovdqu %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm13[0,1],xmm15[2],xmm13[3,4],xmm15[5],xmm13[6,7]
+; AVX512F-SLOW-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm13 = xmm1[0],xmm12[0],xmm1[1],xmm12[1],xmm1[2],xmm12[2],xmm1[3],xmm12[3]
; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm12[4],xmm1[4],xmm12[5],xmm1[5],xmm12[6],xmm1[6],xmm12[7],xmm1[7]
; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm16, %xmm1
+; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm4[4],xmm7[4],xmm4[5],xmm7[5],xmm4[6],xmm7[6],xmm4[7],xmm7[7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm17, %xmm1
+; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm5, %xmm3
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm23, %xmm1
; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm1, %xmm1
-; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm4, %xmm3
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm18, %xmm4
+; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm13, %xmm1
+; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm4, %xmm4
-; AVX512F-SLOW-NEXT: vmovdqu %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm13, %xmm4
-; AVX512F-SLOW-NEXT: vmovdqu %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm2, %xmm2
-; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm23 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm23 = mem[2,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm22 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm22 = mem[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm18 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm18 = mem[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm24 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm24 = mem[2,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm23 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm23 = mem[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm7 = mem[0,2,2,3]
; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm17 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm17 = mem[2,1,3,3]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm6[0,1,2,3,4,5,7,6]
-; AVX512F-SLOW-NEXT: vpermt2d %zmm0, %zmm28, %zmm2
+; AVX512F-SLOW-NEXT: vpermt2d %zmm0, %zmm29, %zmm4
; AVX512F-SLOW-NEXT: vpbroadcastd 36(%rax), %ymm0
; AVX512F-SLOW-NEXT: vpbroadcastd 40(%rax), %ymm6
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm28
-; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm28
+; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm28
; AVX512F-SLOW-NEXT: vmovdqa 32(%rcx), %xmm6
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm29, %xmm0
-; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm6, %xmm2
+; AVX512F-SLOW-NEXT: vpshufb %xmm14, %xmm6, %xmm4
; AVX512F-SLOW-NEXT: vmovdqa 32(%rdx), %xmm0
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[1,1,2,2]
-; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0],xmm2[1],xmm7[2,3],xmm2[4],xmm7[5,6],xmm2[7]
-; AVX512F-SLOW-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; AVX512F-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm0[1,1,2,2]
+; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm12[0],xmm4[1],xmm12[2,3],xmm4[4],xmm12[5,6],xmm4[7]
+; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
+; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
; AVX512F-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Reload
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm4[0,1,2,3,5,5,7,6,8,9,10,11,13,13,15,14]
+; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm1[0,1,2,3,5,5,7,6,8,9,10,11,13,13,15,14]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm29 = ymm0[3,3,3,3]
-; AVX512F-SLOW-NEXT: vpshufd $233, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm2 = mem[1,2,2,3,5,6,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm10 = ymm4[2,1,2,3,6,5,6,7]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm10 = ymm10[0,0,3,3,4,5,6,7,8,8,11,11,12,13,14,15]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm10[2,2,2,2]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,1]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm30, %xmm4
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm4[0,2,3,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd $233, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm0 = mem[1,2,2,3,5,6,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm14 = ymm1[2,1,2,3,6,5,6,7]
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm14 = ymm14[0,0,3,3,4,5,6,7,8,8,11,11,12,13,14,15]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm14 = ymm14[2,2,2,2]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,1,1]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm27, %xmm1
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm1[0,2,3,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,0,2,1]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm26, %xmm4
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm4[2,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm25, %xmm1
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm1[2,1,2,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm8 = xmm8[0,1,2,3,4,5,5,4]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm8 = ymm8[0,0,1,3]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,0,1,1]
-; AVX512F-SLOW-NEXT: vmovdqa64 %xmm19, %xmm4
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm15 = xmm4[0,2,3,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm20, %xmm1
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm15 = xmm1[0,2,3,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm15 = ymm15[0,0,2,1]
-; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm12 = xmm14[2,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqa64 %xmm19, %xmm1
+; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm12 = xmm1[2,1,2,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm12 = xmm12[0,1,2,3,4,5,5,4]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm12 = ymm12[0,0,1,3]
; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload
@@ -11215,176 +11216,174 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: # ymm5 = mem[2,2,2,3]
; AVX512F-SLOW-NEXT: vpermq $250, {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm6 = mem[2,2,3,3]
-; AVX512F-SLOW-NEXT: vpermpd $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm7 = mem[2,2,2,3]
-; AVX512F-SLOW-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vpermpd $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm1 = mem[2,2,2,3]
+; AVX512F-SLOW-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-SLOW-NEXT: vpermpd $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm1 = mem[0,2,2,3]
+; AVX512F-SLOW-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm31 = ymm31[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm30 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm30 = mem[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm26 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm26 = mem[2,1,3,3]
-; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm19 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm19 = mem[2,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm27 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm27 = mem[2,1,3,3]
+; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm20 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm20 = mem[2,2,2,3]
; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm11 = mem[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm16 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm16 = mem[2,1,3,2]
-; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm7 = mem[2,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm20 = ymm20[2,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm13 = ymm27[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm14 = ymm24[2,1,3,2]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm21[2,2,2,3]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm22, %zmm23, %zmm22
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm17, %zmm18, %zmm23
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm27 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm22, %zmm27, %zmm23
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,1,3,2]
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm23, %ymm17, %ymm2
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm2, %ymm17, %ymm0
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm17, %zmm2 # 64-byte Folded Reload
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm1 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm1[4,5,6,7]
-; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm17, %zmm3 # 64-byte Folded Reload
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm19 = ymm30[2,1,3,2]
+; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm1 = mem[2,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm25 = ymm22[2,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm13 = ymm21[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm16 = ymm16[2,1,3,2]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm18[2,2,2,3]
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm23, %zmm24, %zmm23
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm17, %zmm7, %zmm24
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm30 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm23, %zmm30, %zmm24
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,2]
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm7 = [65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm24, %ymm7, %ymm0
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm18 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm0, %ymm18, %ymm14
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm18, %zmm2 # 64-byte Folded Reload
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm0[4,5,6,7]
+; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm18, %zmm3 # 64-byte Folded Reload
; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm10, %zmm10 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm8, %zmm8 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm22 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm10, %zmm22, %zmm8
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm23 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm10, %zmm23, %zmm8
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535]
; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm17, %zmm8
; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm15, %zmm2 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm12, %zmm10 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm22, %zmm10
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm23, %zmm10
; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm3, %zmm17, %zmm10
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm2
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm18
-; AVX512F-SLOW-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm18
-; AVX512F-SLOW-NEXT: vextracti64x4 $1, %zmm23, %ymm2
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogd $226, 124(%r8){1to8}, %ymm1, %ymm2
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm2
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm21
+; AVX512F-SLOW-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm21
+; AVX512F-SLOW-NEXT: vextracti64x4 $1, %zmm24, %ymm2
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogd $226, 124(%r8){1to8}, %ymm0, %ymm2
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535]
; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm2, %ymm3, %ymm29
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogq $248, {{[-0-9]+}}(%r{{[sb]}}p), %zmm3, %zmm21 # 64-byte Folded Reload
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm2
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm1[0,1,2,3],zmm2[4,5,6,7]
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vpternlogq $248, {{[-0-9]+}}(%r{{[sb]}}p), %zmm3, %zmm22 # 64-byte Folded Reload
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm0[0,1,2,3],zmm6[0,1,2,3]
; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm3, %zmm24 # 64-byte Folded Reload
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm29, %zmm0, %zmm2
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0]
-; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm0, %zmm1, %zmm23
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm14[0,1,2,3],zmm2[4,5,6,7]
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogd $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm29 # 64-byte Folded Reload
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0]
+; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm2, %zmm0, %zmm29
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vpternlogd $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm7 # 64-byte Folded Reload
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2, %zmm18 # 64-byte Folded Reload
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm17 # 64-byte Folded Reload
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm6 # 64-byte Folded Reload
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm11, %zmm19, %zmm0
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm16, %zmm2
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535]
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm3, %zmm2
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm20, %zmm0
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm14, %zmm4
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm3, %zmm4
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0]
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm2, %zmm0, %zmm19
+; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2, %zmm17 # 64-byte Folded Reload
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm11, %zmm20, %zmm2
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm19, %zmm3
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535]
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm4, %zmm3
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm25, %zmm2
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm16, %zmm5
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm4, %zmm5
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0]
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm4, %zmm0, %zmm20
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm3, %zmm2, %zmm20
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm5, %zmm2, %zmm25
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm31, %zmm0, %zmm0
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm26, %zmm30, %zmm2
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm0, %zmm27, %zmm2
-; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm0 = mem[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm3 = mem[0,1,3,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm3 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm3 = mem[2,1,3,3]
-; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm4 = mem[0,0,1,1]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
-; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm4 = mem[0,0,2,1]
-; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm5 = mem[2,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,5,5,4]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm5 = ymm5[0,0,1,3]
-; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm26 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm26 = mem[0,0,1,1]
-; AVX512F-SLOW-NEXT: vpshuflw $248, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm7 = mem[0,2,3,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm7[0,0,2,1]
-; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm9 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm9 = mem[0,2,2,3]
-; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm11 = mem[0,1,3,2,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[0,0,1,1]
-; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm12 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm12 = mem[2,1,3,3]
-; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm13 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm13 = mem[0,0,1,1]
-; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm14 # 32-byte Folded Reload
-; AVX512F-SLOW-NEXT: # ymm14 = mem[0,0,2,1]
-; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Folded Reload
-; AVX512F-SLOW-NEXT: # xmm15 = mem[2,1,2,3,4,5,6,7]
-; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm15 = xmm15[0,1,2,3,4,5,5,4]
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm15 = ymm15[0,0,1,3]
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm27, %zmm31, %zmm3
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm2, %zmm30, %zmm3
+; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm2 = mem[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Folded Reload
+; AVX512F-SLOW-NEXT: # xmm4 = mem[0,1,3,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,0,1,1]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,1,1,3]
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
+; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm4 = mem[2,1,3,3]
+; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm5 = mem[0,0,1,1]
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4
+; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm5 = mem[0,0,2,1]
+; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload
+; AVX512F-SLOW-NEXT: # xmm6 = mem[2,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,4,5,5,4]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,0,1,3]
+; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm19 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm19 = mem[0,0,1,1]
+; AVX512F-SLOW-NEXT: vpshuflw $248, {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Folded Reload
+; AVX512F-SLOW-NEXT: # xmm9 = mem[0,2,3,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,0,2,1]
+; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm11 = mem[0,2,2,3]
+; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload
+; AVX512F-SLOW-NEXT: # xmm12 = mem[0,1,3,2,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[0,0,1,1]
+; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm13 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm13 = mem[2,1,3,3]
+; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm14 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm14 = mem[0,0,1,1]
+; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm15 # 32-byte Folded Reload
+; AVX512F-SLOW-NEXT: # ymm15 = mem[0,0,2,1]
+; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512F-SLOW-NEXT: # xmm0 = mem[2,1,2,3,4,5,6,7]
+; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4]
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm16 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm16 = mem[0,0,1,1]
; AVX512F-SLOW-NEXT: vpshuflw $248, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; AVX512F-SLOW-NEXT: # xmm1 = mem[0,2,3,3,4,5,6,7]
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm27, %zmm3
-; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm11[0,1,1,3]
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm9, %zmm0
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm12, %zmm9
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm27, %zmm9
-; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535]
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm3, %zmm0, %zmm11
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm9, %zmm0, %zmm25
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm0
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm26, %zmm3
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm22, %zmm3
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm15, %zmm14, %zmm0
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm30, %zmm4
+; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm12[0,1,1,3]
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm11, %zmm2
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm14, %zmm13, %zmm11
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm30, %zmm11
+; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535]
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm4, %zmm2, %zmm12
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm11, %zmm2, %zmm26
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm5, %zmm2
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm19, %zmm4
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm23, %zmm4
+; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm15, %zmm0
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm16, %zmm1
-; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm22, %zmm1
+; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm23, %zmm1
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0]
-; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm4 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm3, %zmm0, %zmm4
+; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
+; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm4, %zmm0, %zmm2
; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm1, %zmm0, %zmm28
-; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm18, %zmm24
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm29, %zmm0
-; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
+; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm21, %zmm24
+; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm7
; AVX512F-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %rax
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm24, 320(%rax)
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm28, 256(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm25, 192(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm20, 128(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm26, 192(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm25, 128(%rax)
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm10, 64(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm6, (%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm17, 448(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm4, 704(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm11, 640(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm19, 576(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm17, (%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm18, 448(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm2, 704(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm12, 640(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm20, 576(%rax)
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm8, 512(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm0, 384(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm21, 768(%rax)
-; AVX512F-SLOW-NEXT: vmovdqa64 %zmm23, 832(%rax)
-; AVX512F-SLOW-NEXT: addq $2200, %rsp # imm = 0x898
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm7, 384(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm22, 768(%rax)
+; AVX512F-SLOW-NEXT: vmovdqa64 %zmm29, 832(%rax)
+; AVX512F-SLOW-NEXT: addq $2168, %rsp # imm = 0x878
; AVX512F-SLOW-NEXT: vzeroupper
; AVX512F-SLOW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
index d6cd027..b7baf7b 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
@@ -725,19 +725,17 @@ define void @store_i8_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp
; AVX512BW-SLOW-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512BW-SLOW-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
-; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,2,1,3]
-; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zmm1[18],zero,zero,zero,zero,zero,zero,zmm1[19],zero,zero,zero,zero,zmm1[36,44],zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46,54],zero,zero,zero,zero,zero,zero,zmm1[55],zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2]
; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm0[0,2,1,3,4,6,5,7]
+; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zmm1[18],zero,zero,zero,zero,zero,zero,zmm1[19],zero,zero,zero,zero,zmm1[36,44],zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46,54],zero,zero,zero,zero,zero,zero,zmm1[55],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,3,0,2,5,7,4,6]
; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zmm0[1],zero,zero,zero,zero,zmm0[18,26],zero,zero,zero,zero,zero,zmm0[19,27],zero,zero,zero,zero,zero,zero,zero,zmm0[36],zero,zero,zero,zero,zero,zero,zmm0[37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512BW-SLOW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm1
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,0,1]
-; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zero,zero,zmm2[0,8],zero,zero,zero,zero,zero,zmm2[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm2[19,27],zero,zero,zero,zero,zero,zmm2[20,28],zero,zero,zero,zero,zero,zero,zero,zmm2[37,45],zero,zero,zero,zero,zero,zmm2[38,46],zero,zero,zero,zmm2[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-SLOW-NEXT: vporq %zmm1, %zmm2, %zmm1
+; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5]
+; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-SLOW-NEXT: vporq %zmm2, %zmm1, %zmm1
; AVX512BW-SLOW-NEXT: movabsq $63546854584629360, %rcx # imm = 0xE1C3870E1C3870
; AVX512BW-SLOW-NEXT: kmovq %rcx, %k1
; AVX512BW-SLOW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
@@ -7627,17 +7625,15 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm4
; AVX512F-SLOW-NEXT: vporq %ymm15, %ymm18, %ymm5
; AVX512F-SLOW-NEXT: vporq %ymm19, %ymm20, %ymm6
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm5
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm6[0,1,2,3],zmm5[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm6[0,1,2,3],zmm5[0,1,2,3]
; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm16
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm16
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
; AVX512F-SLOW-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm26
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm17, %zmm26
; AVX512F-SLOW-NEXT: vporq %ymm21, %ymm22, %ymm1
-; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm5 # 64-byte Reload
-; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[4,5,6,7]
+; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[0,1,2,3]
; AVX512F-SLOW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm2
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
; AVX512F-SLOW-NEXT: vpermq $68, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Folded Reload
@@ -7706,9 +7702,9 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm7 = [128,128,128,128,128,14,128,128,128,128,128,128,15,128,128,128,128,128,128,16,128,128,128,128,128,128,17,128,128,128,128,128]
; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm1, %ymm0
; AVX512F-FAST-NEXT: vmovdqa64 %ymm1, %ymm26
-; AVX512F-FAST-NEXT: vmovdqa (%rdx), %ymm1
-; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,0,1,14,128,14,15,0,1,14,15,128,13,14,15,16,17,16,128,30,31,30,31,16,17,128,31,28,29,30,31]
-; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm1, %ymm3
+; AVX512F-FAST-NEXT: vmovdqa (%rdx), %ymm2
+; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,0,1,14,128,14,15,0,1,14,15,128,13,14,15,16,17,16,128,30,31,30,31,16,17,128,31,28,29,30,31]
+; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm2, %ymm3
; AVX512F-FAST-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512F-FAST-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-FAST-NEXT: vmovdqa (%r8), %ymm0
@@ -7753,78 +7749,78 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-FAST-NEXT: vporq %xmm9, %xmm12, %xmm22
; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm13, %ymm7
; AVX512F-FAST-NEXT: vmovdqa64 %ymm13, %ymm20
-; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm14, %ymm2
-; AVX512F-FAST-NEXT: vpor %ymm7, %ymm2, %ymm2
-; AVX512F-FAST-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm14, %ymm1
+; AVX512F-FAST-NEXT: vpor %ymm7, %ymm1, %ymm1
+; AVX512F-FAST-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-FAST-NEXT: vmovdqa64 %ymm16, %ymm7
-; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,ymm7[14],zero,zero,zero,zero,zero,zero,ymm7[15],zero,zero,zero,zero,zero,zero,ymm7[16],zero,zero,zero,zero,zero,zero,ymm7[17],zero,zero,zero,zero,zero,zero,ymm7[18]
+; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,ymm7[14],zero,zero,zero,zero,zero,zero,ymm7[15],zero,zero,zero,zero,zero,zero,ymm7[16],zero,zero,zero,zero,zero,zero,ymm7[17],zero,zero,zero,zero,zero,zero,ymm7[18]
; AVX512F-FAST-NEXT: vmovdqa64 %ymm17, %ymm7
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,14],zero,ymm7[12,13,0,1,14,15],zero,ymm7[3,12,13,2,3,16],zero,ymm7[30,31,28,29,16,17],zero,ymm7[31,18,19,28,29,18],zero
-; AVX512F-FAST-NEXT: vpor %ymm2, %ymm7, %ymm2
-; AVX512F-FAST-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
-; AVX512F-FAST-NEXT: vmovdqa64 %ymm18, %ymm2
+; AVX512F-FAST-NEXT: vpor %ymm1, %ymm7, %ymm1
+; AVX512F-FAST-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-FAST-NEXT: vmovdqa64 %ymm18, %ymm1
; AVX512F-FAST-NEXT: vmovdqu64 %ymm18, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-FAST-NEXT: vmovdqa64 %ymm25, %ymm7
-; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm2, %ymm2
+; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm1, %ymm1
; AVX512F-FAST-NEXT: vmovdqa64 %ymm30, %ymm0
; AVX512F-FAST-NEXT: vpshufb %ymm0, %ymm4, %ymm0
-; AVX512F-FAST-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512F-FAST-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-FAST-NEXT: vmovdqa (%rsi), %xmm13
; AVX512F-FAST-NEXT: vpshufb %xmm11, %xmm13, %xmm0
; AVX512F-FAST-NEXT: vmovdqa (%rdi), %xmm9
-; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm9, %xmm2
-; AVX512F-FAST-NEXT: vporq %xmm0, %xmm2, %xmm31
+; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm9, %xmm1
+; AVX512F-FAST-NEXT: vporq %xmm0, %xmm1, %xmm31
; AVX512F-FAST-NEXT: vmovdqa (%rcx), %xmm14
; AVX512F-FAST-NEXT: vpshufb %xmm8, %xmm14, %xmm0
; AVX512F-FAST-NEXT: vmovdqa (%rdx), %xmm8
-; AVX512F-FAST-NEXT: vmovdqa64 %xmm19, %xmm2
-; AVX512F-FAST-NEXT: vpshufb %xmm2, %xmm8, %xmm2
-; AVX512F-FAST-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-FAST-NEXT: vmovdqa64 %xmm19, %xmm1
+; AVX512F-FAST-NEXT: vpshufb %xmm1, %xmm8, %xmm1
+; AVX512F-FAST-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX512F-FAST-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-FAST-NEXT: vmovdqa (%r9), %xmm2
-; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm2, %xmm0
-; AVX512F-FAST-NEXT: vmovdqa %xmm2, %xmm3
-; AVX512F-FAST-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512F-FAST-NEXT: vmovdqa (%r9), %xmm1
+; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm1, %xmm0
+; AVX512F-FAST-NEXT: vmovdqa %xmm1, %xmm3
+; AVX512F-FAST-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX512F-FAST-NEXT: vmovdqa (%r8), %xmm4
-; AVX512F-FAST-NEXT: vmovdqa64 %xmm21, %xmm2
-; AVX512F-FAST-NEXT: vpshufb %xmm2, %xmm4, %xmm2
+; AVX512F-FAST-NEXT: vmovdqa64 %xmm21, %xmm1
+; AVX512F-FAST-NEXT: vpshufb %xmm1, %xmm4, %xmm1
; AVX512F-FAST-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512F-FAST-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-FAST-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm0[23],zero,zero,zero,zero,ymm0[26],zero,ymm0[24],zero,zero,zero,zero,ymm0[27],zero,ymm0[25]
; AVX512F-FAST-NEXT: vmovdqa64 %ymm23, %ymm12
-; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm12[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm12[21],zero,ymm12[19],zero,zero,zero,zero,ymm12[22],zero,ymm12[20],zero,zero
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm12[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm12[21],zero,ymm12[19],zero,zero,zero,zero,ymm12[22],zero,ymm12[20],zero,zero
+; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-FAST-NEXT: vmovdqa64 %ymm26, %ymm11
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm11[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,ymm11[25],zero,ymm11[23],zero,zero,zero,zero,ymm11[26],zero,ymm11[24],zero,zero,zero,zero
-; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm11[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm11[18],zero,zero,zero,zero,ymm11[21],zero,ymm11[19],zero,zero,zero,zero,ymm11[22],zero,ymm11[20]
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm11[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm11[18],zero,zero,zero,zero,ymm11[21],zero,ymm11[19],zero,zero,zero,zero,ymm11[22],zero,ymm11[20]
+; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
-; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27,24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27]
-; AVX512F-FAST-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27,24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27]
+; AVX512F-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [18,128,18,19,20,21,128,19,128,25,26,27,22,128,20,128,18,128,18,19,20,21,128,19,128,25,26,27,22,128,20,128]
; AVX512F-FAST-NEXT: # ymm5 = mem[0,1,0,1]
-; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm1, %ymm0
-; AVX512F-FAST-NEXT: vmovdqa64 %ymm2, %ymm19
-; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm1, %ymm2
+; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm2, %ymm0
+; AVX512F-FAST-NEXT: vmovdqa64 %ymm1, %ymm19
+; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm2, %ymm1
; AVX512F-FAST-NEXT: vmovdqa64 %ymm5, %ymm30
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm24, %zmm0, %zmm0
-; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
+; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = <u,6,7,2,3,u,u,u,8,9,4,5,u,u,u,10>
-; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,0,1],zmm0[4,5,6,7]
+; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[4,5,6,7]
; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
-; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,3,3,2,2,3,3]
-; AVX512F-FAST-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [2,2,3,3,2,2,3,3]
+; AVX512F-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512F-FAST-NEXT: vmovdqa (%rax), %xmm0
; AVX512F-FAST-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX512F-FAST-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,6]
-; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512F-FAST-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512F-FAST-NEXT: vmovdqa (%rax), %ymm4
; AVX512F-FAST-NEXT: vmovdqu %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -7832,52 +7828,52 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm4, %ymm4
; AVX512F-FAST-NEXT: vmovdqa64 %ymm5, %ymm18
; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm24
-; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm1[30],zero,ymm1[28],zero,zero,zero,zero,ymm1[31],zero,ymm1[29],zero,zero
+; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm2[30],zero,ymm2[28],zero,zero,zero,zero,ymm2[31],zero,ymm2[29],zero,zero
; AVX512F-FAST-NEXT: vmovdqa64 %ymm0, %ymm23
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14]
; AVX512F-FAST-NEXT: vmovdqu (%rsp), %ymm0 # 32-byte Reload
-; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm0, %ymm0
-; AVX512F-FAST-NEXT: vmovdqa64 %ymm1, %ymm25
+; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm0, %ymm0
+; AVX512F-FAST-NEXT: vmovdqa64 %ymm2, %ymm25
; AVX512F-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
-; AVX512F-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
-; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm1[27],zero,zero,zero,zero,ymm1[30],zero,ymm1[28],zero,zero,zero,zero,ymm1[31],zero,ymm1[29]
-; AVX512F-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
+; AVX512F-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
+; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm2[27],zero,zero,zero,zero,ymm2[30],zero,ymm2[28],zero,zero,zero,zero,ymm2[31],zero,ymm2[29]
+; AVX512F-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,3]
; AVX512F-FAST-NEXT: vbroadcasti32x4 {{.*#+}} ymm26 = [18374967954648269055,71777218572844800,18374967954648269055,71777218572844800]
; AVX512F-FAST-NEXT: # ymm26 = mem[0,1,2,3,0,1,2,3]
-; AVX512F-FAST-NEXT: vpternlogq $248, %ymm26, %ymm0, %ymm1
+; AVX512F-FAST-NEXT: vpternlogq $248, %ymm26, %ymm0, %ymm2
; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm10[0],xmm15[0],xmm10[1],xmm15[1],xmm10[2],xmm15[2],xmm10[3],xmm15[3],xmm10[4],xmm15[4],xmm10[5],xmm15[5],xmm10[6],xmm15[6],xmm10[7],xmm15[7]
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,4,5,0,1,u,u,u,6,7,2,3,u,u,u]
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm1[0,1,2,3],zmm0[0,1,0,1]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm2[0,1,2,3],zmm0[0,1,0,1]
; AVX512F-FAST-NEXT: vmovdqa64 %xmm29, %xmm0
-; AVX512F-FAST-NEXT: vmovdqa64 %xmm28, %xmm1
-; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm14[8],xmm8[8],xmm14[9],xmm8[9],xmm14[10],xmm8[10],xmm14[11],xmm8[11],xmm14[12],xmm8[12],xmm14[13],xmm8[13],xmm14[14],xmm8[14],xmm14[15],xmm8[15]
+; AVX512F-FAST-NEXT: vmovdqa64 %xmm28, %xmm2
+; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm14[8],xmm8[8],xmm14[9],xmm8[9],xmm14[10],xmm8[10],xmm14[11],xmm8[11],xmm14[12],xmm8[12],xmm14[13],xmm8[13],xmm14[14],xmm8[14],xmm14[15],xmm8[15]
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} xmm4 = <6,3,2,u,u,u,9,8,5,4,u,u,u,11,10,7>
-; AVX512F-FAST-NEXT: vpshufb %xmm4, %xmm1, %xmm1
-; AVX512F-FAST-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512F-FAST-NEXT: vpshufb %xmm4, %xmm2, %xmm2
+; AVX512F-FAST-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-FAST-NEXT: vpshufb %xmm4, %xmm0, %xmm0
-; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
-; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm0, %zmm1, %zmm28
+; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload
+; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm28
; AVX512F-FAST-NEXT: vmovdqa64 %xmm27, %xmm0
-; AVX512F-FAST-NEXT: vmovdqa %xmm6, %xmm1
+; AVX512F-FAST-NEXT: vmovdqa %xmm6, %xmm2
; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm6 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
-; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm13[8],xmm9[8],xmm13[9],xmm9[9],xmm13[10],xmm9[10],xmm13[11],xmm9[11],xmm13[12],xmm9[12],xmm13[13],xmm9[13],xmm13[14],xmm9[14],xmm13[15],xmm9[15]
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = <2,u,u,u,9,8,5,4,u,u,u,11,10,7,6,u>
; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm4, %xmm0
; AVX512F-FAST-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm1, %xmm1
+; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm2, %xmm2
; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
-; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm27
-; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm10[8],xmm15[8],xmm10[9],xmm15[9],xmm10[10],xmm15[10],xmm10[11],xmm15[11],xmm10[12],xmm15[12],xmm10[13],xmm15[13],xmm10[14],xmm15[14],xmm10[15],xmm15[15]
-; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm22[0,1,0,1],zmm1[0,1,0,1]
+; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm27
+; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm10[8],xmm15[8],xmm10[9],xmm15[9],xmm10[10],xmm15[10],xmm10[11],xmm15[11],xmm10[12],xmm15[12],xmm10[13],xmm15[13],xmm10[14],xmm15[14],xmm10[15],xmm15[15]
+; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm22[0,1,0,1],zmm2[0,1,0,1]
; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; AVX512F-FAST-NEXT: vmovdqa 32(%rax), %xmm0
-; AVX512F-FAST-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,5,6]
+; AVX512F-FAST-NEXT: vpshufhw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5,5,6]
; AVX512F-FAST-NEXT: vmovdqa64 %xmm0, %xmm29
-; AVX512F-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm0
+; AVX512F-FAST-NEXT: vpermd %ymm2, %ymm1, %ymm0
; AVX512F-FAST-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12]
; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm11, %ymm5
@@ -8065,9 +8061,8 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-FAST-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm0
; AVX512F-FAST-NEXT: vpor %ymm12, %ymm15, %ymm2
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[0,1,2,3]
; AVX512F-FAST-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm16
; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm16
; AVX512F-FAST-NEXT: vpermq $68, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Folded Reload
@@ -8079,8 +8074,7 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm2
; AVX512F-FAST-NEXT: vpor %ymm1, %ymm13, %ymm1
; AVX512F-FAST-NEXT: vpor %ymm11, %ymm14, %ymm5
-; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
-; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[4,5,6,7]
+; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[0,1,2,3]
; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm4, %zmm20, %zmm4
; AVX512F-FAST-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm4
; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm4
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
index 257c9a2..7bb3874 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
@@ -572,45 +572,45 @@ define void @store_i8_stride8_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp
; AVX512F-SLOW-NEXT: vzeroupper
; AVX512F-SLOW-NEXT: retq
;
-; AVX512-FAST-LABEL: store_i8_stride8_vf8:
-; AVX512-FAST: # %bb.0:
-; AVX512-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; AVX512-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
-; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
-; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
-; AVX512-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
-; AVX512-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,3,5,7,1,3,5,7]
-; AVX512-FAST-NEXT: # ymm2 = mem[0,1,0,1]
-; AVX512-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm3
-; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = <u,u,u,u,0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15>
-; AVX512-FAST-NEXT: vpshufb %ymm4, %ymm3, %ymm3
-; AVX512-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2
-; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15,u,u,u,u>
-; AVX512-FAST-NEXT: vpshufb %ymm5, %ymm2, %ymm2
-; AVX512-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
-; AVX512-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,4,6,0,2,4,6]
-; AVX512-FAST-NEXT: # ymm3 = mem[0,1,0,1]
-; AVX512-FAST-NEXT: vpermd %ymm1, %ymm3, %ymm1
-; AVX512-FAST-NEXT: vpshufb %ymm4, %ymm1, %ymm1
-; AVX512-FAST-NEXT: vpermd %ymm0, %ymm3, %ymm0
-; AVX512-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm0
-; AVX512-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX512-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512-FAST-NEXT: vmovdqa64 %zmm0, (%rax)
-; AVX512-FAST-NEXT: vzeroupper
-; AVX512-FAST-NEXT: retq
+; AVX512F-FAST-LABEL: store_i8_stride8_vf8:
+; AVX512F-FAST: # %bb.0:
+; AVX512F-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512F-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512F-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512F-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
+; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,3,5,7,1,3,5,7]
+; AVX512F-FAST-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX512F-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm3
+; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = <u,u,u,u,0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15>
+; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm3, %ymm3
+; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2
+; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15,u,u,u,u>
+; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm2, %ymm2
+; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
+; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,4,6,0,2,4,6]
+; AVX512F-FAST-NEXT: # ymm3 = mem[0,1,0,1]
+; AVX512F-FAST-NEXT: vpermd %ymm1, %ymm3, %ymm1
+; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm1, %ymm1
+; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm3, %ymm0
+; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm0
+; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-FAST-NEXT: vmovdqa64 %zmm0, (%rax)
+; AVX512F-FAST-NEXT: vzeroupper
+; AVX512F-FAST-NEXT: retq
;
; AVX512BW-SLOW-LABEL: store_i8_stride8_vf8:
; AVX512BW-SLOW: # %bb.0:
@@ -629,32 +629,61 @@ define void @store_i8_stride8_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp
; AVX512BW-SLOW-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
; AVX512BW-SLOW-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
; AVX512BW-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,u,u,22,30,u,u,u,u,u,u,23,31]
-; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm1[2,3,0,1]
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,22,30,u,u,u,u,u,u,23,31,u,u]
-; AVX512BW-SLOW-NEXT: movw $17544, %cx # imm = 0x4488
+; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zmm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u]
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u,u,u]
+; AVX512BW-SLOW-NEXT: movl $287445282, %ecx # imm = 0x11221122
; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1
-; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm4, %ymm2 {%k1}
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm4 = ymm0[4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,u,u,22,30,u,u,u,u,u,u,23,31,u,u,u,u]
-; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm5 = ymm0[2,3,0,1]
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm6 = ymm5[u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,22,30,u,u,u,u,u,u,23,31,u,u,u,u,u,u]
-; AVX512BW-SLOW-NEXT: movw $4386, %cx # imm = 0x1122
-; AVX512BW-SLOW-NEXT: kmovd %ecx, %k2
-; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm6, %ymm4 {%k2}
-; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0],ymm2[1],ymm4[2],ymm2[3],ymm4[4],ymm2[5],ymm4[6],ymm2[7]
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27]
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u]
-; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm3, %ymm1 {%k1}
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u]
-; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm3 = ymm5[u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u]
-; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm3, %ymm0 {%k2}
-; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, (%rax)
+; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[2,3,0,1,2,3,0,1]
+; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u]
+; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3,0,1,2,3]
+; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zmm2[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63]
+; AVX512BW-SLOW-NEXT: movl $1149781128, %ecx # imm = 0x44884488
+; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1
+; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1}
+; AVX512BW-SLOW-NEXT: movw $-21846, %cx # imm = 0xAAAA
+; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1
+; AVX512BW-SLOW-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm1, (%rax)
; AVX512BW-SLOW-NEXT: vzeroupper
; AVX512BW-SLOW-NEXT: retq
+;
+; AVX512BW-FAST-LABEL: store_i8_stride8_vf8:
+; AVX512BW-FAST: # %bb.0:
+; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX512BW-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
+; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
+; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,10,12,14,8,10,12,14,9,11,13,15,9,11,13,15]
+; AVX512BW-FAST-NEXT: vpermd %zmm1, %zmm2, %zmm1
+; AVX512BW-FAST-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[u,u,u,u,0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,18,22,26,30,u,u,u,u,19,23,27,31,u,u,u,u,32,36,40,44,u,u,u,u,33,37,41,45,u,u,u,u,50,54,58,62,u,u,u,u,51,55,59,63]
+; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,4,6,0,2,4,6,1,3,5,7,1,3,5,7]
+; AVX512BW-FAST-NEXT: vpermd %zmm0, %zmm2, %zmm0
+; AVX512BW-FAST-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,18,22,26,30,u,u,u,u,19,23,27,31,u,u,u,u,32,36,40,44,u,u,u,u,33,37,41,45,u,u,u,u,50,54,58,62,u,u,u,u,51,55,59,63,u,u,u,u]
+; AVX512BW-FAST-NEXT: movw $-21846, %cx # imm = 0xAAAA
+; AVX512BW-FAST-NEXT: kmovd %ecx, %k1
+; AVX512BW-FAST-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512BW-FAST-NEXT: vmovdqa64 %zmm0, (%rax)
+; AVX512BW-FAST-NEXT: vzeroupper
+; AVX512BW-FAST-NEXT: retq
%in.vec0 = load <8 x i8>, ptr %in.vecptr0, align 64
%in.vec1 = load <8 x i8>, ptr %in.vecptr1, align 64
%in.vec2 = load <8 x i8>, ptr %in.vecptr2, align 64
@@ -1051,69 +1080,182 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vzeroupper
; AVX2-ONLY-NEXT: retq
;
-; AVX512-LABEL: store_i8_stride8_vf16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; AVX512-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512-NEXT: vmovdqa (%rdx), %xmm1
-; AVX512-NEXT: vmovdqa (%r8), %xmm2
-; AVX512-NEXT: vmovdqa (%r11), %xmm3
-; AVX512-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
-; AVX512-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
-; AVX512-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2
-; AVX512-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3
-; AVX512-NEXT: vpermq {{.*#+}} ymm4 = ymm3[0,2,0,2]
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm5 = <u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15>
-; AVX512-NEXT: vpshufb %ymm5, %ymm4, %ymm6
-; AVX512-NEXT: vpermq {{.*#+}} ymm7 = ymm2[0,2,0,2]
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm8 = <u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u>
-; AVX512-NEXT: vpshufb %ymm8, %ymm7, %ymm9
-; AVX512-NEXT: vpblendw {{.*#+}} ymm6 = ymm9[0,1,2],ymm6[3],ymm9[4,5,6],ymm6[7],ymm9[8,9,10],ymm6[11],ymm9[12,13,14],ymm6[15]
-; AVX512-NEXT: vpermq {{.*#+}} ymm9 = ymm1[0,2,0,2]
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm10 = <u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u>
-; AVX512-NEXT: vpshufb %ymm10, %ymm9, %ymm11
-; AVX512-NEXT: vpermq {{.*#+}} ymm12 = ymm0[0,2,0,2]
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm13 = <4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u>
-; AVX512-NEXT: vpshufb %ymm13, %ymm12, %ymm14
-; AVX512-NEXT: vpblendw {{.*#+}} ymm11 = ymm14[0],ymm11[1],ymm14[2,3,4],ymm11[5],ymm14[6,7,8],ymm11[9],ymm14[10,11,12],ymm11[13],ymm14[14,15]
-; AVX512-NEXT: vpblendd {{.*#+}} ymm6 = ymm11[0],ymm6[1],ymm11[2],ymm6[3],ymm11[4],ymm6[5],ymm11[6],ymm6[7]
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm11 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11>
-; AVX512-NEXT: vpshufb %ymm11, %ymm4, %ymm4
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm14 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u>
-; AVX512-NEXT: vpshufb %ymm14, %ymm7, %ymm7
-; AVX512-NEXT: vpblendw {{.*#+}} ymm4 = ymm7[0,1,2],ymm4[3],ymm7[4,5,6],ymm4[7],ymm7[8,9,10],ymm4[11],ymm7[12,13,14],ymm4[15]
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm7 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u>
-; AVX512-NEXT: vpshufb %ymm7, %ymm9, %ymm9
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm15 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u>
-; AVX512-NEXT: vpshufb %ymm15, %ymm12, %ymm12
-; AVX512-NEXT: vpblendw {{.*#+}} ymm9 = ymm12[0],ymm9[1],ymm12[2,3,4],ymm9[5],ymm12[6,7,8],ymm9[9],ymm12[10,11,12],ymm9[13],ymm12[14,15]
-; AVX512-NEXT: vpblendd {{.*#+}} ymm4 = ymm9[0],ymm4[1],ymm9[2],ymm4[3],ymm9[4],ymm4[5],ymm9[6],ymm4[7]
-; AVX512-NEXT: vinserti64x4 $1, %ymm6, %zmm4, %zmm4
-; AVX512-NEXT: vpermq {{.*#+}} ymm3 = ymm3[1,3,1,3]
-; AVX512-NEXT: vpshufb %ymm5, %ymm3, %ymm5
-; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3]
-; AVX512-NEXT: vpshufb %ymm8, %ymm2, %ymm6
-; AVX512-NEXT: vpblendw {{.*#+}} ymm5 = ymm6[0,1,2],ymm5[3],ymm6[4,5,6],ymm5[7],ymm6[8,9,10],ymm5[11],ymm6[12,13,14],ymm5[15]
-; AVX512-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,1,3]
-; AVX512-NEXT: vpshufb %ymm10, %ymm1, %ymm6
-; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3]
-; AVX512-NEXT: vpshufb %ymm13, %ymm0, %ymm8
-; AVX512-NEXT: vpblendw {{.*#+}} ymm6 = ymm8[0],ymm6[1],ymm8[2,3,4],ymm6[5],ymm8[6,7,8],ymm6[9],ymm8[10,11,12],ymm6[13],ymm8[14,15]
-; AVX512-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
-; AVX512-NEXT: vpshufb %ymm11, %ymm3, %ymm3
-; AVX512-NEXT: vpshufb %ymm14, %ymm2, %ymm2
-; AVX512-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2],ymm3[3],ymm2[4,5,6],ymm3[7],ymm2[8,9,10],ymm3[11],ymm2[12,13,14],ymm3[15]
-; AVX512-NEXT: vpshufb %ymm7, %ymm1, %ymm1
-; AVX512-NEXT: vpshufb %ymm15, %ymm0, %ymm0
-; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4],ymm1[5],ymm0[6,7,8],ymm1[9],ymm0[10,11,12],ymm1[13],ymm0[14,15]
-; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
-; AVX512-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
-; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rax)
-; AVX512-NEXT: vmovdqa64 %zmm4, (%rax)
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: store_i8_stride8_vf16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512F-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512F-NEXT: vmovdqa (%r8), %xmm2
+; AVX512F-NEXT: vmovdqa (%r11), %xmm3
+; AVX512F-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512F-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512F-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2
+; AVX512F-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3
+; AVX512F-NEXT: vpermq {{.*#+}} ymm4 = ymm3[0,2,0,2]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = <u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15>
+; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm6
+; AVX512F-NEXT: vpermq {{.*#+}} ymm7 = ymm2[0,2,0,2]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = <u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u>
+; AVX512F-NEXT: vpshufb %ymm8, %ymm7, %ymm9
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm6 = ymm9[0,1,2],ymm6[3],ymm9[4,5,6],ymm6[7],ymm9[8,9,10],ymm6[11],ymm9[12,13,14],ymm6[15]
+; AVX512F-NEXT: vpermq {{.*#+}} ymm9 = ymm1[0,2,0,2]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm10 = <u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u>
+; AVX512F-NEXT: vpshufb %ymm10, %ymm9, %ymm11
+; AVX512F-NEXT: vpermq {{.*#+}} ymm12 = ymm0[0,2,0,2]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm13 = <4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u>
+; AVX512F-NEXT: vpshufb %ymm13, %ymm12, %ymm14
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm11 = ymm14[0],ymm11[1],ymm14[2,3,4],ymm11[5],ymm14[6,7,8],ymm11[9],ymm14[10,11,12],ymm11[13],ymm14[14,15]
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm6 = ymm11[0],ymm6[1],ymm11[2],ymm6[3],ymm11[4],ymm6[5],ymm11[6],ymm6[7]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm11 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11>
+; AVX512F-NEXT: vpshufb %ymm11, %ymm4, %ymm4
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm14 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u>
+; AVX512F-NEXT: vpshufb %ymm14, %ymm7, %ymm7
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm7[0,1,2],ymm4[3],ymm7[4,5,6],ymm4[7],ymm7[8,9,10],ymm4[11],ymm7[12,13,14],ymm4[15]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u>
+; AVX512F-NEXT: vpshufb %ymm7, %ymm9, %ymm9
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm15 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u>
+; AVX512F-NEXT: vpshufb %ymm15, %ymm12, %ymm12
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm9 = ymm12[0],ymm9[1],ymm12[2,3,4],ymm9[5],ymm12[6,7,8],ymm9[9],ymm12[10,11,12],ymm9[13],ymm12[14,15]
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm4 = ymm9[0],ymm4[1],ymm9[2],ymm4[3],ymm9[4],ymm4[5],ymm9[6],ymm4[7]
+; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm4, %zmm4
+; AVX512F-NEXT: vpermq {{.*#+}} ymm3 = ymm3[1,3,1,3]
+; AVX512F-NEXT: vpshufb %ymm5, %ymm3, %ymm5
+; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3]
+; AVX512F-NEXT: vpshufb %ymm8, %ymm2, %ymm6
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm5 = ymm6[0,1,2],ymm5[3],ymm6[4,5,6],ymm5[7],ymm6[8,9,10],ymm5[11],ymm6[12,13,14],ymm5[15]
+; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,1,3]
+; AVX512F-NEXT: vpshufb %ymm10, %ymm1, %ymm6
+; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3]
+; AVX512F-NEXT: vpshufb %ymm13, %ymm0, %ymm8
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm6 = ymm8[0],ymm6[1],ymm8[2,3,4],ymm6[5],ymm8[6,7,8],ymm6[9],ymm8[10,11,12],ymm6[13],ymm8[14,15]
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
+; AVX512F-NEXT: vpshufb %ymm11, %ymm3, %ymm3
+; AVX512F-NEXT: vpshufb %ymm14, %ymm2, %ymm2
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2],ymm3[3],ymm2[4,5,6],ymm3[7],ymm2[8,9,10],ymm3[11],ymm2[12,13,14],ymm3[15]
+; AVX512F-NEXT: vpshufb %ymm7, %ymm1, %ymm1
+; AVX512F-NEXT: vpshufb %ymm15, %ymm0, %ymm0
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4],ymm1[5],ymm0[6,7,8],ymm1[9],ymm0[10,11,12],ymm1[13],ymm0[14,15]
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
+; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa64 %zmm0, 64(%rax)
+; AVX512F-NEXT: vmovdqa64 %zmm4, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-SLOW-LABEL: store_i8_stride8_vf16:
+; AVX512BW-SLOW: # %bb.0:
+; AVX512BW-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512BW-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512BW-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX512BW-SLOW-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512BW-SLOW-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512BW-SLOW-NEXT: vmovdqa (%r8), %xmm2
+; AVX512BW-SLOW-NEXT: vmovdqa (%r11), %xmm3
+; AVX512BW-SLOW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512BW-SLOW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
+; AVX512BW-SLOW-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3
+; AVX512BW-SLOW-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2
+; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm3
+; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm4 = zmm2[0,2,0,2,4,6,4,6]
+; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm5 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u>
+; AVX512BW-SLOW-NEXT: vpshufb %zmm5, %zmm4, %zmm4
+; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[4,5,6,7,4,5,6,7]
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm6 = zmm3[0,2,0,2,4,6,4,6]
+; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm7 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15>
+; AVX512BW-SLOW-NEXT: vpshufb %zmm7, %zmm6, %zmm6
+; AVX512BW-SLOW-NEXT: movl $8913032, %ecx # imm = 0x880088
+; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1
+; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm6, %zmm4 {%k1}
+; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm6 = zmm0[0,2,0,2,4,6,4,6]
+; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm8 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u>
+; AVX512BW-SLOW-NEXT: vpshufb %zmm8, %zmm6, %zmm6
+; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[4,5,6,7,4,5,6,7]
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm9 = zmm1[0,2,0,2,4,6,4,6]
+; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u>
+; AVX512BW-SLOW-NEXT: vpshufb %zmm10, %zmm9, %zmm9
+; AVX512BW-SLOW-NEXT: movl $2228258, %ecx # imm = 0x220022
+; AVX512BW-SLOW-NEXT: kmovd %ecx, %k2
+; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm9, %zmm6 {%k2}
+; AVX512BW-SLOW-NEXT: movw $-21846, %cx # imm = 0xAAAA
+; AVX512BW-SLOW-NEXT: kmovd %ecx, %k3
+; AVX512BW-SLOW-NEXT: vmovdqa32 %zmm4, %zmm6 {%k3}
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm2 = zmm2[1,3,1,3,5,7,5,7]
+; AVX512BW-SLOW-NEXT: vpshufb %zmm5, %zmm2, %zmm2
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm3 = zmm3[1,3,1,3,5,7,5,7]
+; AVX512BW-SLOW-NEXT: vpshufb %zmm7, %zmm3, %zmm3
+; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm3, %zmm2 {%k1}
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,3,1,3,5,7,5,7]
+; AVX512BW-SLOW-NEXT: vpshufb %zmm8, %zmm0, %zmm0
+; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[1,3,1,3,5,7,5,7]
+; AVX512BW-SLOW-NEXT: vpshufb %zmm10, %zmm1, %zmm1
+; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k2}
+; AVX512BW-SLOW-NEXT: vmovdqa32 %zmm2, %zmm0 {%k3}
+; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, 64(%rax)
+; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm6, (%rax)
+; AVX512BW-SLOW-NEXT: vzeroupper
+; AVX512BW-SLOW-NEXT: retq
+;
+; AVX512BW-FAST-LABEL: store_i8_stride8_vf16:
+; AVX512BW-FAST: # %bb.0:
+; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; AVX512BW-FAST-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512BW-FAST-NEXT: vmovdqa (%rdx), %xmm1
+; AVX512BW-FAST-NEXT: vmovdqa (%r8), %xmm2
+; AVX512BW-FAST-NEXT: vmovdqa (%r11), %xmm3
+; AVX512BW-FAST-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
+; AVX512BW-FAST-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
+; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm4
+; AVX512BW-FAST-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3
+; AVX512BW-FAST-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2
+; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm5
+; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm6 = [0,2,0,2,12,14,12,14]
+; AVX512BW-FAST-NEXT: vpermt2q %zmm5, %zmm6, %zmm3
+; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm7 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15>
+; AVX512BW-FAST-NEXT: vpshufb %zmm7, %zmm3, %zmm3
+; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm8 = zmm2[0,2,0,2,4,6,4,6]
+; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm9 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u>
+; AVX512BW-FAST-NEXT: vpshufb %zmm9, %zmm8, %zmm8
+; AVX512BW-FAST-NEXT: movl $8913032, %ecx # imm = 0x880088
+; AVX512BW-FAST-NEXT: kmovd %ecx, %k1
+; AVX512BW-FAST-NEXT: vmovdqu16 %zmm3, %zmm8 {%k1}
+; AVX512BW-FAST-NEXT: vpermt2q %zmm4, %zmm6, %zmm1
+; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm3 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u>
+; AVX512BW-FAST-NEXT: vpshufb %zmm3, %zmm1, %zmm1
+; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm6 = zmm0[0,2,0,2,4,6,4,6]
+; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm10 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u>
+; AVX512BW-FAST-NEXT: vpshufb %zmm10, %zmm6, %zmm6
+; AVX512BW-FAST-NEXT: movl $2228258, %ecx # imm = 0x220022
+; AVX512BW-FAST-NEXT: kmovd %ecx, %k2
+; AVX512BW-FAST-NEXT: vmovdqu16 %zmm1, %zmm6 {%k2}
+; AVX512BW-FAST-NEXT: movw $-21846, %cx # imm = 0xAAAA
+; AVX512BW-FAST-NEXT: kmovd %ecx, %k3
+; AVX512BW-FAST-NEXT: vmovdqa32 %zmm8, %zmm6 {%k3}
+; AVX512BW-FAST-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [5,7,5,7,5,7,5,7]
+; AVX512BW-FAST-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-FAST-NEXT: vpermq %zmm5, %zmm1, %zmm5
+; AVX512BW-FAST-NEXT: vpshufb %zmm7, %zmm5, %zmm5
+; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm2 = zmm2[1,3,1,3,5,7,5,7]
+; AVX512BW-FAST-NEXT: vpshufb %zmm9, %zmm2, %zmm2
+; AVX512BW-FAST-NEXT: vmovdqu16 %zmm5, %zmm2 {%k1}
+; AVX512BW-FAST-NEXT: vpermq %zmm4, %zmm1, %zmm1
+; AVX512BW-FAST-NEXT: vpshufb %zmm3, %zmm1, %zmm1
+; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,3,1,3,5,7,5,7]
+; AVX512BW-FAST-NEXT: vpshufb %zmm10, %zmm0, %zmm0
+; AVX512BW-FAST-NEXT: vmovdqu16 %zmm1, %zmm0 {%k2}
+; AVX512BW-FAST-NEXT: vmovdqa32 %zmm2, %zmm0 {%k3}
+; AVX512BW-FAST-NEXT: vmovdqa64 %zmm0, 64(%rax)
+; AVX512BW-FAST-NEXT: vmovdqa64 %zmm6, (%rax)
+; AVX512BW-FAST-NEXT: vzeroupper
+; AVX512BW-FAST-NEXT: retq
%in.vec0 = load <16 x i8>, ptr %in.vecptr0, align 64
%in.vec1 = load <16 x i8>, ptr %in.vecptr1, align 64
%in.vec2 = load <16 x i8>, ptr %in.vecptr2, align 64
@@ -6988,6 +7130,8 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; AVX1: {{.*}}
+; AVX512: {{.*}}
+; AVX512-FAST: {{.*}}
; AVX512-SLOW: {{.*}}
; AVX512BW: {{.*}}
; AVX512BW-ONLY: {{.*}}
@@ -6997,7 +7141,6 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512DQBW-FAST: {{.*}}
; AVX512DQBW-ONLY: {{.*}}
; AVX512DQBW-SLOW: {{.*}}
-; AVX512F: {{.*}}
; AVX512F-ONLY: {{.*}}
; FALLBACK0: {{.*}}
; FALLBACK1: {{.*}}
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 86e8782..a7b1fa2 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -5684,10 +5684,9 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
;
; AVX512BW-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -5797,10 +5796,9 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in
;
; AVX512BW-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -5910,10 +5908,9 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.v
;
; AVX512BW-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -6004,10 +6001,9 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i
;
; AVX512BW-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -6211,12 +6207,12 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
;
; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero
-; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63]
+; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -6330,12 +6326,12 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in.
;
; AVX512BW-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,0,37,38,39,0,41,42,43,0,45,46,47,0,49,50,51,0,53,54,55,0,57,58,59,0,61,62,63]
+; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -6449,12 +6445,12 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i
;
; AVX512BW-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,36,37,38,39,0,41,42,43,44,45,46,47,0,49,50,51,52,53,54,55,0,57,58,59,60,61,62,63]
+; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll
new file mode 100644
index 0000000..c4b209d
--- /dev/null
+++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll
@@ -0,0 +1,35 @@
+; RUN: opt %s -S -passes=declare-to-assign -o - | FileCheck %s
+
+; CHECK: call void @llvm.dbg.declare
+
+define dso_local void @f() sanitize_hwaddress !dbg !9 {
+entry:
+ %a = alloca i32, align 4
+ call void @llvm.dbg.declare(metadata ptr %a, metadata !13, metadata !DIExpression()), !dbg !16
+ ret void, !dbg !17
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{!"clang version 17.0.0"}
+!9 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12)
+!10 = !DISubroutineType(types: !11)
+!11 = !{null}
+!12 = !{!13}
+!13 = !DILocalVariable(name: "a", scope: !9, file: !1, line: 1, type: !14)
+!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!15 = !DILocation(line: 1, column: 12, scope: !9)
+!16 = !DILocation(line: 1, column: 16, scope: !9)
+!17 = !DILocation(line: 1, column: 19, scope: !9)
diff --git a/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1.ll b/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1.ll
new file mode 100644
index 0000000..9731771
--- /dev/null
+++ b/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1.ll
@@ -0,0 +1,16 @@
+define i32 @sub1(i32 %0) {
+ %2 = add i32 %0, -1
+ ret i32 %2
+}
+
+define i32 @main(i32 %0) {
+ %2 = call i32 @sub1(i32 %0)
+ ret i32 %2
+}
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!1}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang 18.0.0git", emissionKind: FullDebug)
+!2 = !DIFile(filename: "argc_sub1.c", directory: ".")
diff --git a/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1_elf.ll b/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1_elf.ll
deleted file mode 100644
index 0cdc5e7..0000000
--- a/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1_elf.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; ModuleID = 'argc_sub1.c'
-
-define i32 @sub1(i32) !dbg !8 {
- call void @llvm.dbg.value(metadata i32 %0, metadata !13, metadata !DIExpression()), !dbg !14
- %2 = add nsw i32 %0, -1, !dbg !15
- ret i32 %2, !dbg !16
-}
-
-define i32 @main(i32, i8** nocapture readnone) !dbg !17 {
- call void @llvm.dbg.value(metadata i32 %0, metadata !24, metadata !DIExpression()), !dbg !26
- call void @llvm.dbg.value(metadata i8** %1, metadata !25, metadata !DIExpression()), !dbg !27
- %3 = tail call i32 @sub1(i32 %0), !dbg !28
- ret i32 %3, !dbg !29
-}
-
-declare void @llvm.dbg.value(metadata, metadata, metadata)
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4, !5, !6}
-!llvm.ident = !{!7}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "argc_sub1.c", directory: "Inputs/")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !{i32 1, !"wchar_size", i32 4}
-!6 = !{i32 7, !"PIC Level", i32 2}
-!7 = !{!"clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)"}
-!8 = distinct !DISubprogram(name: "sub1", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
-!9 = !DISubroutineType(types: !10)
-!10 = !{!11, !11}
-!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!12 = !{!13}
-!13 = !DILocalVariable(name: "x", arg: 1, scope: !8, file: !1, line: 1, type: !11)
-!14 = !DILocation(line: 1, column: 14, scope: !8)
-!15 = !DILocation(line: 1, column: 28, scope: !8)
-!16 = !DILocation(line: 1, column: 19, scope: !8)
-!17 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !18, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !23)
-!18 = !DISubroutineType(types: !19)
-!19 = !{!11, !11, !20}
-!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !21, size: 64)
-!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
-!22 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
-!23 = !{!24, !25}
-!24 = !DILocalVariable(name: "argc", arg: 1, scope: !17, file: !1, line: 2, type: !11)
-!25 = !DILocalVariable(name: "argv", arg: 2, scope: !17, file: !1, line: 2, type: !20)
-!26 = !DILocation(line: 2, column: 14, scope: !17)
-!27 = !DILocation(line: 2, column: 27, scope: !17)
-!28 = !DILocation(line: 2, column: 42, scope: !17)
-!29 = !DILocation(line: 2, column: 35, scope: !17)
diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test
index b0b3350..83dbf62 100644
--- a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test
+++ b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test
@@ -1,15 +1,21 @@
-# This test makes sure that the example builds and executes as expected.
+# Check that the debug support plugin appends a jit_code_entry to the
+# jit_descriptor of the child process.
+
# Instructions for debugging can be found in LLJITWithRemoteDebugging.cpp
# REQUIRES: default_triple
# UNSUPPORTED: target=powerpc64{{.*}}
-# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1_elf.ll | FileCheck --check-prefix=CHECK0 %s
-# CHECK0: Parsing input IR code from: {{.*}}/Inputs/argc_sub1_elf.ll
+# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1.ll 2>&1 | FileCheck --check-prefix=CHECK0 %s
+# CHECK0: __jit_debug_descriptor.last_entry = [[BEFORE0:0x[0-9a-f]+]]
+# CHECK0-NOT: __jit_debug_descriptor.last_entry = [[BEFORE0]]
+# CHECK0: Parsing input IR code from: {{.*}}/Inputs/argc_sub1.ll
# CHECK0: Running: main()
# CHECK0: Exit code: 0
-# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1_elf.ll --args 2nd 3rd 4th | FileCheck --check-prefix=CHECK3 %s
-# CHECK3: Parsing input IR code from: {{.*}}/Inputs/argc_sub1_elf.ll
+# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1.ll --args 2nd 3rd 4th 2>&1 | FileCheck --check-prefix=CHECK3 %s
+# CHECK3: __jit_debug_descriptor.last_entry = [[BEFORE3:0x[0-9a-f]+]]
+# CHECK3-NOT: __jit_debug_descriptor.last_entry = [[BEFORE3]]
+# CHECK3: Parsing input IR code from: {{.*}}/Inputs/argc_sub1.ll
# CHECK3: Running: main("2nd", "3rd", "4th")
# CHECK3: Exit code: 3
diff --git a/llvm/test/MC/AArch64/FP8/dot.s b/llvm/test/MC/AArch64/FP8/dot.s
index e755430..cfcc113 100644
--- a/llvm/test/MC/AArch64/FP8/dot.s
+++ b/llvm/test/MC/AArch64/FP8/dot.s
@@ -44,6 +44,12 @@ fdot v31.4h, v31.8b, v15.2b[0]
// CHECK-ERROR: instruction requires: fp8dot2
// CHECK-UNKNOWN: 0f4f03ff <unknown>
+fdot v26.8H, v22.16B, v9.2B[0]
+// CHECK-INST: fdot v26.8h, v22.16b, v9.2b[0]
+// CHECK-ENCODING: [0xda,0x02,0x49,0x4f]
+// CHECK-ERROR: instruction requires: fp8dot2
+// CHECK-UNKNOWN: 4f4902da <unknown>
+
fdot v0.8h, v0.16b, v15.2b[7]
// CHECK-INST: fdot v0.8h, v0.16b, v15.2b[7]
// CHECK-ENCODING: [0x00,0x08,0x7f,0x4f]
diff --git a/llvm/test/MC/AArch64/SME2p1/bfadd-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfadd-diagnostics.s
index bc9a112..cadb470 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfadd-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfadd-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Out of range index offset
diff --git a/llvm/test/MC/AArch64/SME2p1/bfadd.s b/llvm/test/MC/AArch64/SME2/bfadd.s
index 0fb553d..0ba009a 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfadd.s
+++ b/llvm/test/MC/AArch64/SME2/bfadd.s
@@ -1,300 +1,300 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfadd za.h[w8, 0, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00000000
// CHECK-INST: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x00,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c00 <unknown>
bfadd za.h[w8, 0], {z0.h - z1.h} // 11000001-11100100-00011100-00000000
// CHECK-INST: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x00,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c00 <unknown>
bfadd za.h[w10, 5, vgx2], {z10.h, z11.h} // 11000001-11100100-01011101-01000101
// CHECK-INST: bfadd za.h[w10, 5, vgx2], { z10.h, z11.h }
// CHECK-ENCODING: [0x45,0x5d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45d45 <unknown>
bfadd za.h[w10, 5], {z10.h - z11.h} // 11000001-11100100-01011101-01000101
// CHECK-INST: bfadd za.h[w10, 5, vgx2], { z10.h, z11.h }
// CHECK-ENCODING: [0x45,0x5d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45d45 <unknown>
bfadd za.h[w11, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-01111101-10000111
// CHECK-INST: bfadd za.h[w11, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x87,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d87 <unknown>
bfadd za.h[w11, 7], {z12.h - z13.h} // 11000001-11100100-01111101-10000111
// CHECK-INST: bfadd za.h[w11, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x87,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d87 <unknown>
bfadd za.h[w11, 7, vgx2], {z30.h, z31.h} // 11000001-11100100-01111111-11000111
// CHECK-INST: bfadd za.h[w11, 7, vgx2], { z30.h, z31.h }
// CHECK-ENCODING: [0xc7,0x7f,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47fc7 <unknown>
bfadd za.h[w11, 7], {z30.h - z31.h} // 11000001-11100100-01111111-11000111
// CHECK-INST: bfadd za.h[w11, 7, vgx2], { z30.h, z31.h }
// CHECK-ENCODING: [0xc7,0x7f,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47fc7 <unknown>
bfadd za.h[w8, 5, vgx2], {z16.h, z17.h} // 11000001-11100100-00011110-00000101
// CHECK-INST: bfadd za.h[w8, 5, vgx2], { z16.h, z17.h }
// CHECK-ENCODING: [0x05,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41e05 <unknown>
bfadd za.h[w8, 5], {z16.h - z17.h} // 11000001-11100100-00011110-00000101
// CHECK-INST: bfadd za.h[w8, 5, vgx2], { z16.h, z17.h }
// CHECK-ENCODING: [0x05,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41e05 <unknown>
bfadd za.h[w8, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00000001
// CHECK-INST: bfadd za.h[w8, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x01,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c01 <unknown>
bfadd za.h[w8, 1], {z0.h - z1.h} // 11000001-11100100-00011100-00000001
// CHECK-INST: bfadd za.h[w8, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x01,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c01 <unknown>
bfadd za.h[w10, 0, vgx2], {z18.h, z19.h} // 11000001-11100100-01011110, 01000000
// CHECK-INST: bfadd za.h[w10, 0, vgx2], { z18.h, z19.h }
// CHECK-ENCODING: [0x40,0x5e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45e40 <unknown>
bfadd za.h[w10, 0], {z18.h - z19.h} // 11000001-11100100-01011110-01000000
// CHECK-INST: bfadd za.h[w10, 0, vgx2], { z18.h, z19.h }
// CHECK-ENCODING: [0x40,0x5e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45e40 <unknown>
bfadd za.h[w8, 0, vgx2], {z12.h, z13.h} // 11000001-11100100-00011101-10000000
// CHECK-INST: bfadd za.h[w8, 0, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x80,0x1d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41d80 <unknown>
bfadd za.h[w8, 0], {z12.h - z13.h} // 11000001-11100100-00011101-10000000
// CHECK-INST: bfadd za.h[w8, 0, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x80,0x1d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41d80 <unknown>
bfadd za.h[w10, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-01011100-00000001
// CHECK-INST: bfadd za.h[w10, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x01,0x5c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45c01 <unknown>
bfadd za.h[w10, 1], {z0.h - z1.h} // 11000001-11100100-01011100-00000001
// CHECK-INST: bfadd za.h[w10, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x01,0x5c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45c01 <unknown>
bfadd za.h[w8, 5, vgx2], {z22.h, z23.h} // 11000001-11100100-00011110, 11000101
// CHECK-INST: bfadd za.h[w8, 5, vgx2], { z22.h, z23.h }
// CHECK-ENCODING: [0xc5,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41ec5 <unknown>
bfadd za.h[w8, 5], {z22.h - z23.h} // 11000001-11100100-00011110-11000101
// CHECK-INST: bfadd za.h[w8, 5, vgx2], { z22.h, z23.h }
// CHECK-ENCODING: [0xc5,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41ec5 <unknown>
bfadd za.h[w11, 2, vgx2], {z8.h, z9.h} // 11000001-11100100-01111101-00000010
// CHECK-INST: bfadd za.h[w11, 2, vgx2], { z8.h, z9.h }
// CHECK-ENCODING: [0x02,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d02 <unknown>
bfadd za.h[w11, 2], {z8.h - z9.h} // 11000001-11100100-01111101-00000010
// CHECK-INST: bfadd za.h[w11, 2, vgx2], { z8.h, z9.h }
// CHECK-ENCODING: [0x02,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d02 <unknown>
bfadd za.h[w9, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-00111101-10000111
// CHECK-INST: bfadd za.h[w9, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x87,0x3d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e43d87 <unknown>
bfadd za.h[w9, 7], {z12.h - z13.h} // 11000001-11100100-00111101-10000111
// CHECK-INST: bfadd za.h[w9, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x87,0x3d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e43d87 <unknown>
bfadd za.h[w8, 0, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00000000
// CHECK-INST: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x00,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c00 <unknown>
bfadd za.h[w8, 0], {z0.h - z3.h} // 11000001-11100101-00011100-00000000
// CHECK-INST: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x00,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c00 <unknown>
bfadd za.h[w10, 5, vgx4], {z8.h - z11.h} // 11000001-11100101-01011101-00000101
// CHECK-INST: bfadd za.h[w10, 5, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x05,0x5d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55d05 <unknown>
bfadd za.h[w10, 5], {z8.h - z11.h} // 11000001-11100101-01011101-00000101
// CHECK-INST: bfadd za.h[w10, 5, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x05,0x5d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55d05 <unknown>
bfadd za.h[w11, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-01111101-10000111
// CHECK-INST: bfadd za.h[w11, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x87,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d87 <unknown>
bfadd za.h[w11, 7], {z12.h - z15.h} // 11000001-11100101-01111101-10000111
// CHECK-INST: bfadd za.h[w11, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x87,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d87 <unknown>
bfadd za.h[w11, 7, vgx4], {z28.h - z31.h} // 11000001-11100101-01111111-10000111
// CHECK-INST: bfadd za.h[w11, 7, vgx4], { z28.h - z31.h }
// CHECK-ENCODING: [0x87,0x7f,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57f87 <unknown>
bfadd za.h[w11, 7], {z28.h - z31.h} // 11000001-11100101-01111111-10000111
// CHECK-INST: bfadd za.h[w11, 7, vgx4], { z28.h - z31.h }
// CHECK-ENCODING: [0x87,0x7f,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57f87 <unknown>
bfadd za.h[w8, 5, vgx4], {z16.h - z19.h} // 11000001-11100101-00011110-00000101
// CHECK-INST: bfadd za.h[w8, 5, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x05,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e05 <unknown>
bfadd za.h[w8, 5], {z16.h - z19.h} // 11000001-11100101-00011110-00000101
// CHECK-INST: bfadd za.h[w8, 5, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x05,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e05 <unknown>
bfadd za.h[w8, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00000001
// CHECK-INST: bfadd za.h[w8, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x01,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c01 <unknown>
bfadd za.h[w8, 1], {z0.h - z3.h} // 11000001-11100101-00011100-00000001
// CHECK-INST: bfadd za.h[w8, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x01,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c01 <unknown>
bfadd za.h[w10, 0, vgx4], {z16.h - z19.h} // 11000001-11100101-01011110-00000000
// CHECK-INST: bfadd za.h[w10, 0, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x00,0x5e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55e00 <unknown>
bfadd za.h[w10, 0], {z16.h - z19.h} // 11000001-11100101-01011110-00000000
// CHECK-INST: bfadd za.h[w10, 0, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x00,0x5e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55e00 <unknown>
bfadd za.h[w8, 0, vgx4], {z12.h - z15.h} // 11000001-11100101-00011101-10000000
// CHECK-INST: bfadd za.h[w8, 0, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x80,0x1d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51d80 <unknown>
bfadd za.h[w8, 0], {z12.h - z15.h} // 11000001-11100101-00011101-10000000
// CHECK-INST: bfadd za.h[w8, 0, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x80,0x1d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51d80 <unknown>
bfadd za.h[w10, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-01011100-00000001
// CHECK-INST: bfadd za.h[w10, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x01,0x5c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55c01 <unknown>
bfadd za.h[w10, 1], {z0.h - z3.h} // 11000001-11100101-01011100-00000001
// CHECK-INST: bfadd za.h[w10, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x01,0x5c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55c01 <unknown>
bfadd za.h[w8, 5, vgx4], {z20.h - z23.h} // 11000001-11100101-00011110-10000101
// CHECK-INST: bfadd za.h[w8, 5, vgx4], { z20.h - z23.h }
// CHECK-ENCODING: [0x85,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e85 <unknown>
bfadd za.h[w8, 5], {z20.h - z23.h} // 11000001-11100101-00011110-10000101
// CHECK-INST: bfadd za.h[w8, 5, vgx4], { z20.h - z23.h }
// CHECK-ENCODING: [0x85,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e85 <unknown>
bfadd za.h[w11, 2, vgx4], {z8.h - z11.h} // 11000001-11100101-01111101-00000010
// CHECK-INST: bfadd za.h[w11, 2, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x02,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d02 <unknown>
bfadd za.h[w11, 2], {z8.h - z11.h} // 11000001-11100101-01111101-00000010
// CHECK-INST: bfadd za.h[w11, 2, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x02,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d02 <unknown>
bfadd za.h[w9, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-00111101-10000111
// CHECK-INST: bfadd za.h[w9, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x87,0x3d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e53d87 <unknown>
bfadd za.h[w9, 7], {z12.h - z15.h} // 11000001-11100101-00111101-10000111
// CHECK-INST: bfadd za.h[w9, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x87,0x3d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e53d87 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfclamp-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfclamp-diagnostics.s
index ee48a21..661cfad 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfclamp-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfclamp-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid vector list
diff --git a/llvm/test/MC/AArch64/SME2p1/bfclamp.s b/llvm/test/MC/AArch64/SME2/bfclamp.s
index ebf8500..dc3caec 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfclamp.s
+++ b/llvm/test/MC/AArch64/SME2/bfclamp.s
@@ -1,60 +1,60 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfclamp {z0.h, z1.h}, z0.h, z0.h // 11000001-00100000-11000000-00000000
// CHECK-INST: bfclamp { z0.h, z1.h }, z0.h, z0.h
// CHECK-ENCODING: [0x00,0xc0,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120c000 <unknown>
bfclamp {z20.h, z21.h}, z10.h, z21.h // 11000001-00110101-11000001-01010100
// CHECK-INST: bfclamp { z20.h, z21.h }, z10.h, z21.h
// CHECK-ENCODING: [0x54,0xc1,0x35,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c135c154 <unknown>
bfclamp {z22.h, z23.h}, z13.h, z8.h // 11000001-00101000-11000001-10110110
// CHECK-INST: bfclamp { z22.h, z23.h }, z13.h, z8.h
// CHECK-ENCODING: [0xb6,0xc1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128c1b6 <unknown>
bfclamp {z30.h, z31.h}, z31.h, z31.h // 11000001-00111111-11000011-11111110
// CHECK-INST: bfclamp { z30.h, z31.h }, z31.h, z31.h
// CHECK-ENCODING: [0xfe,0xc3,0x3f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13fc3fe <unknown>
bfclamp {z0.h - z3.h}, z0.h, z0.h // 11000001-00100000-11001000-00000000
// CHECK-INST: bfclamp { z0.h - z3.h }, z0.h, z0.h
// CHECK-ENCODING: [0x00,0xc8,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120c800 <unknown>
bfclamp {z20.h - z23.h}, z10.h, z21.h // 11000001-00110101-11001001-01010100
// CHECK-INST: bfclamp { z20.h - z23.h }, z10.h, z21.h
// CHECK-ENCODING: [0x54,0xc9,0x35,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c135c954 <unknown>
bfclamp {z20.h - z23.h}, z13.h, z8.h // 11000001-00101000-11001001-10110100
// CHECK-INST: bfclamp { z20.h - z23.h }, z13.h, z8.h
// CHECK-ENCODING: [0xb4,0xc9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128c9b4 <unknown>
bfclamp {z28.h - z31.h}, z31.h, z31.h // 11000001-00111111-11001011-11111100
// CHECK-INST: bfclamp { z28.h - z31.h }, z31.h, z31.h
// CHECK-ENCODING: [0xfc,0xcb,0x3f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13fcbfc <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmax-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmax-diagnostics.s
index 55e5755..bbb619e 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmax-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfmax-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid vector list
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmax.s b/llvm/test/MC/AArch64/SME2/bfmax.s
index d5af905..657fcbc 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmax.s
+++ b/llvm/test/MC/AArch64/SME2/bfmax.s
@@ -1,108 +1,108 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfmax {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00000000
// CHECK-INST: bfmax { z0.h, z1.h }, { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x00,0xa1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a100 <unknown>
bfmax {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00010100
// CHECK-INST: bfmax { z20.h, z21.h }, { z20.h, z21.h }, z5.h
// CHECK-ENCODING: [0x14,0xa1,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a114 <unknown>
bfmax {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00010110
// CHECK-INST: bfmax { z22.h, z23.h }, { z22.h, z23.h }, z8.h
// CHECK-ENCODING: [0x16,0xa1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a116 <unknown>
bfmax {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00011110
// CHECK-INST: bfmax { z30.h, z31.h }, { z30.h, z31.h }, z15.h
// CHECK-ENCODING: [0x1e,0xa1,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa11e <unknown>
bfmax {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00000000
// CHECK-INST: bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x00,0xb1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b100 <unknown>
bfmax {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00010100
// CHECK-INST: bfmax { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x14,0xb1,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b114 <unknown>
bfmax {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00010110
// CHECK-INST: bfmax { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x16,0xb1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b116 <unknown>
bfmax {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00011110
// CHECK-INST: bfmax { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x1e,0xb1,0x3e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13eb11e <unknown>
bfmax {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00000000
// CHECK-INST: bfmax { z0.h - z3.h }, { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x00,0xa9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a900 <unknown>
bfmax {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00010100
// CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, z5.h
// CHECK-ENCODING: [0x14,0xa9,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a914 <unknown>
bfmax {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00010100
// CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, z8.h
// CHECK-ENCODING: [0x14,0xa9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a914 <unknown>
bfmax {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00011100
// CHECK-INST: bfmax { z28.h - z31.h }, { z28.h - z31.h }, z15.h
// CHECK-ENCODING: [0x1c,0xa9,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa91c <unknown>
bfmax {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00000000
// CHECK-INST: bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x00,0xb9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b900 <unknown>
bfmax {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00010100
// CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x14,0xb9,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b914 <unknown>
bfmax {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00010100
// CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x14,0xb9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b914 <unknown>
bfmax {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00011100
// CHECK-INST: bfmax { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x1c,0xb9,0x3c,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13cb91c <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmaxnm-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmaxnm-diagnostics.s
index b1f1511..ab837b6 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmaxnm-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfmaxnm-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid vector list
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmaxnm.s b/llvm/test/MC/AArch64/SME2/bfmaxnm.s
index e5b97e8..f61f530 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmaxnm.s
+++ b/llvm/test/MC/AArch64/SME2/bfmaxnm.s
@@ -1,108 +1,108 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfmaxnm {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00100000
// CHECK-INST: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x20,0xa1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a120 <unknown>
bfmaxnm {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00110100
// CHECK-INST: bfmaxnm { z20.h, z21.h }, { z20.h, z21.h }, z5.h
// CHECK-ENCODING: [0x34,0xa1,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a134 <unknown>
bfmaxnm {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00110110
// CHECK-INST: bfmaxnm { z22.h, z23.h }, { z22.h, z23.h }, z8.h
// CHECK-ENCODING: [0x36,0xa1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a136 <unknown>
bfmaxnm {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00111110
// CHECK-INST: bfmaxnm { z30.h, z31.h }, { z30.h, z31.h }, z15.h
// CHECK-ENCODING: [0x3e,0xa1,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa13e <unknown>
bfmaxnm {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00100000
// CHECK-INST: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x20,0xb1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b120 <unknown>
bfmaxnm {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00110100
// CHECK-INST: bfmaxnm { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x34,0xb1,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b134 <unknown>
bfmaxnm {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00110110
// CHECK-INST: bfmaxnm { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x36,0xb1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b136 <unknown>
bfmaxnm {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00111110
// CHECK-INST: bfmaxnm { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x3e,0xb1,0x3e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13eb13e <unknown>
bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00100000
// CHECK-INST: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x20,0xa9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a920 <unknown>
bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00110100
// CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, z5.h
// CHECK-ENCODING: [0x34,0xa9,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a934 <unknown>
bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00110100
// CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, z8.h
// CHECK-ENCODING: [0x34,0xa9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a934 <unknown>
bfmaxnm {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00111100
// CHECK-INST: bfmaxnm { z28.h - z31.h }, { z28.h - z31.h }, z15.h
// CHECK-ENCODING: [0x3c,0xa9,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa93c <unknown>
bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00100000
// CHECK-INST: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x20,0xb9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b920 <unknown>
bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00110100
// CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x34,0xb9,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b934 <unknown>
bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00110100
// CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x34,0xb9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b934 <unknown>
bfmaxnm {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00111100
// CHECK-INST: bfmaxnm { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x3c,0xb9,0x3c,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13cb93c <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmin-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmin-diagnostics.s
index 72ee818..41f1036 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmin-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfmin-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid vector list
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmin.s b/llvm/test/MC/AArch64/SME2/bfmin.s
index 3ba2be5..6612e3c 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmin.s
+++ b/llvm/test/MC/AArch64/SME2/bfmin.s
@@ -1,108 +1,108 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfmin {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00000001
// CHECK-INST: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x01,0xa1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a101 <unknown>
bfmin {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00010101
// CHECK-INST: bfmin { z20.h, z21.h }, { z20.h, z21.h }, z5.h
// CHECK-ENCODING: [0x15,0xa1,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a115 <unknown>
bfmin {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00010111
// CHECK-INST: bfmin { z22.h, z23.h }, { z22.h, z23.h }, z8.h
// CHECK-ENCODING: [0x17,0xa1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a117 <unknown>
bfmin {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00011111
// CHECK-INST: bfmin { z30.h, z31.h }, { z30.h, z31.h }, z15.h
// CHECK-ENCODING: [0x1f,0xa1,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa11f <unknown>
bfmin {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00000001
// CHECK-INST: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x01,0xb1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b101 <unknown>
bfmin {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00010101
// CHECK-INST: bfmin { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x15,0xb1,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b115 <unknown>
bfmin {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00010111
// CHECK-INST: bfmin { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x17,0xb1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b117 <unknown>
bfmin {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00011111
// CHECK-INST: bfmin { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x1f,0xb1,0x3e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13eb11f <unknown>
bfmin {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00000001
// CHECK-INST: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x01,0xa9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a901 <unknown>
bfmin {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00010101
// CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, z5.h
// CHECK-ENCODING: [0x15,0xa9,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a915 <unknown>
bfmin {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00010101
// CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, z8.h
// CHECK-ENCODING: [0x15,0xa9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a915 <unknown>
bfmin {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00011101
// CHECK-INST: bfmin { z28.h - z31.h }, { z28.h - z31.h }, z15.h
// CHECK-ENCODING: [0x1d,0xa9,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa91d <unknown>
bfmin {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00000001
// CHECK-INST: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x01,0xb9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b901 <unknown>
bfmin {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00010101
// CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x15,0xb9,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b915 <unknown>
bfmin {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00010101
// CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x15,0xb9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b915 <unknown>
bfmin {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00011101
// CHECK-INST: bfmin { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x1d,0xb9,0x3c,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13cb91d <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfminnm-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfminnm-diagnostics.s
index 2d161bf..14485e9 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfminnm-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfminnm-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid vector list
diff --git a/llvm/test/MC/AArch64/SME2p1/bfminnm.s b/llvm/test/MC/AArch64/SME2/bfminnm.s
index cfaa3c1..4a48a0d 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfminnm.s
+++ b/llvm/test/MC/AArch64/SME2/bfminnm.s
@@ -1,113 +1,113 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfminnm {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00100001
// CHECK-INST: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x21,0xa1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a121 <unknown>
bfminnm {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00110101
// CHECK-INST: bfminnm { z20.h, z21.h }, { z20.h, z21.h }, z5.h
// CHECK-ENCODING: [0x35,0xa1,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a135 <unknown>
bfminnm {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00110111
// CHECK-INST: bfminnm { z22.h, z23.h }, { z22.h, z23.h }, z8.h
// CHECK-ENCODING: [0x37,0xa1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a137 <unknown>
bfminnm {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00111111
// CHECK-INST: bfminnm { z30.h, z31.h }, { z30.h, z31.h }, z15.h
// CHECK-ENCODING: [0x3f,0xa1,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa13f <unknown>
bfminnm {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00100001
// CHECK-INST: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x21,0xb1,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b121 <unknown>
bfminnm {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00110101
// CHECK-INST: bfminnm { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x35,0xb1,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b135 <unknown>
bfminnm {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00110111
// CHECK-INST: bfminnm { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x37,0xb1,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b137 <unknown>
bfminnm {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00111111
// CHECK-INST: bfminnm { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x3f,0xb1,0x3e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13eb13f <unknown>
bfminnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00100001
// CHECK-INST: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x21,0xa9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120a921 <unknown>
bfminnm {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00110101
// CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, z5.h
// CHECK-ENCODING: [0x35,0xa9,0x25,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c125a935 <unknown>
bfminnm {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00110101
// CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, z8.h
// CHECK-ENCODING: [0x35,0xa9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128a935 <unknown>
bfminnm {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00111101
// CHECK-INST: bfminnm { z28.h - z31.h }, { z28.h - z31.h }, z15.h
// CHECK-ENCODING: [0x3d,0xa9,0x2f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c12fa93d <unknown>
bfminnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00100001
// CHECK-INST: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x21,0xb9,0x20,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c120b921 <unknown>
bfminnm {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00110101
// CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x35,0xb9,0x34,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c134b935 <unknown>
bfminnm {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00110101
// CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x35,0xb9,0x28,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c128b935 <unknown>
bfminnm {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00111101
// CHECK-INST: bfminnm { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x3d,0xb9,0x3c,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c13cb93d <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmla-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmla-diagnostics.s
index 42bb35d..efd2624 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmla-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfmla-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid vector list
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmla.s b/llvm/test/MC/AArch64/SME2/bfmla.s
index 4c053fe..75ccccc 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmla.s
+++ b/llvm/test/MC/AArch64/SME2/bfmla.s
@@ -1,876 +1,876 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfmla za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00011100-00000000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x00,0x1c,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601c00 <unknown>
bfmla za.h[w8, 0], {z0.h - z1.h}, z0.h // 11000001-01100000-00011100-00000000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x00,0x1c,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601c00 <unknown>
bfmla za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01011101-01000101
// CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h
// CHECK-ENCODING: [0x45,0x5d,0x65,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1655d45 <unknown>
bfmla za.h[w10, 5], {z10.h - z11.h}, z5.h // 11000001-01100101-01011101-01000101
// CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h
// CHECK-ENCODING: [0x45,0x5d,0x65,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1655d45 <unknown>
bfmla za.h[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h
// CHECK-ENCODING: [0xa7,0x7d,0x68,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1687da7 <unknown>
bfmla za.h[w11, 7], {z13.h - z14.h}, z8.h // 11000001-01101000-01111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h
// CHECK-ENCODING: [0xa7,0x7d,0x68,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1687da7 <unknown>
bfmla za.h[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01111111-11100111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h
// CHECK-ENCODING: [0xe7,0x7f,0x6f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16f7fe7 <unknown>
bfmla za.h[w11, 7], {z31.h - z0.h}, z15.h // 11000001-01101111-01111111-11100111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h
// CHECK-ENCODING: [0xe7,0x7f,0x6f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16f7fe7 <unknown>
bfmla za.h[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h
// CHECK-ENCODING: [0x25,0x1e,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601e25 <unknown>
bfmla za.h[w8, 5], {z17.h - z18.h}, z0.h // 11000001-01100000-00011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h
// CHECK-ENCODING: [0x25,0x1e,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601e25 <unknown>
bfmla za.h[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00011100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h
// CHECK-ENCODING: [0x21,0x1c,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1c21 <unknown>
bfmla za.h[w8, 1], {z1.h - z2.h}, z14.h // 11000001-01101110-00011100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h
// CHECK-ENCODING: [0x21,0x1c,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1c21 <unknown>
bfmla za.h[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01011110-01100000
// CHECK-INST: bfmla za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h
// CHECK-ENCODING: [0x60,0x5e,0x64,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1645e60 <unknown>
bfmla za.h[w10, 0], {z19.h - z20.h}, z4.h // 11000001-01100100-01011110-01100000
// CHECK-INST: bfmla za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h
// CHECK-ENCODING: [0x60,0x5e,0x64,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1645e60 <unknown>
bfmla za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00011101-10000000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h
// CHECK-ENCODING: [0x80,0x1d,0x62,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1621d80 <unknown>
bfmla za.h[w8, 0], {z12.h - z13.h}, z2.h // 11000001-01100010-00011101-10000000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h
// CHECK-ENCODING: [0x80,0x1d,0x62,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1621d80 <unknown>
bfmla za.h[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01011100-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h
// CHECK-ENCODING: [0x21,0x5c,0x6a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16a5c21 <unknown>
bfmla za.h[w10, 1], {z1.h - z2.h}, z10.h // 11000001-01101010-01011100-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h
// CHECK-ENCODING: [0x21,0x5c,0x6a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16a5c21 <unknown>
bfmla za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00011110-11000101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h
// CHECK-ENCODING: [0xc5,0x1e,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1ec5 <unknown>
bfmla za.h[w8, 5], {z22.h - z23.h}, z14.h // 11000001-01101110-00011110-11000101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h
// CHECK-ENCODING: [0xc5,0x1e,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1ec5 <unknown>
bfmla za.h[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01111101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h
// CHECK-ENCODING: [0x22,0x7d,0x61,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1617d22 <unknown>
bfmla za.h[w11, 2], {z9.h - z10.h}, z1.h // 11000001-01100001-01111101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h
// CHECK-ENCODING: [0x22,0x7d,0x61,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1617d22 <unknown>
bfmla za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00111101-10000111
// CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h
// CHECK-ENCODING: [0x87,0x3d,0x6b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16b3d87 <unknown>
bfmla za.h[w9, 7], {z12.h - z13.h}, z11.h // 11000001-01101011-00111101-10000111
// CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h
// CHECK-ENCODING: [0x87,0x3d,0x6b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16b3d87 <unknown>
bfmla za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-00010000-00010000-00100000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0]
// CHECK-ENCODING: [0x20,0x10,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101020 <unknown>
bfmla za.h[w8, 0], {z0.h - z1.h}, z0.h[0] // 11000001-00010000-00010000-00100000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0]
// CHECK-ENCODING: [0x20,0x10,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101020 <unknown>
bfmla za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h[2] // 11000001-00010101-01010101-01100101
// CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2]
// CHECK-ENCODING: [0x65,0x55,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1155565 <unknown>
bfmla za.h[w10, 5], {z10.h - z11.h}, z5.h[2] // 11000001-00010101-01010101-01100101
// CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2]
// CHECK-ENCODING: [0x65,0x55,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1155565 <unknown>
bfmla za.h[w11, 7, vgx2], {z12.h, z13.h}, z8.h[6] // 11000001-00011000-01111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6]
// CHECK-ENCODING: [0xa7,0x7d,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1187da7 <unknown>
bfmla za.h[w11, 7], {z12.h - z13.h}, z8.h[6] // 11000001-00011000-01111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6]
// CHECK-ENCODING: [0xa7,0x7d,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1187da7 <unknown>
bfmla za.h[w11, 7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001-00011111-01111111-11101111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7]
// CHECK-ENCODING: [0xef,0x7f,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11f7fef <unknown>
bfmla za.h[w11, 7], {z30.h - z31.h}, z15.h[7] // 11000001-00011111-01111111-11101111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7]
// CHECK-ENCODING: [0xef,0x7f,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11f7fef <unknown>
bfmla za.h[w8, 5, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001-00010000-00011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6]
// CHECK-ENCODING: [0x25,0x1e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101e25 <unknown>
bfmla za.h[w8, 5], {z16.h - z17.h}, z0.h[6] // 11000001-00010000-00011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6]
// CHECK-ENCODING: [0x25,0x1e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101e25 <unknown>
bfmla za.h[w8, 1, vgx2], {z0.h, z1.h}, z14.h[2] // 11000001-00011110-00010100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2]
// CHECK-ENCODING: [0x21,0x14,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1421 <unknown>
bfmla za.h[w8, 1], {z0.h - z1.h}, z14.h[2] // 11000001-00011110-00010100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2]
// CHECK-ENCODING: [0x21,0x14,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1421 <unknown>
bfmla za.h[w10, 0, vgx2], {z18.h, z19.h}, z4.h[3] // 11000001-00010100-01010110-01101000
// CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3]
// CHECK-ENCODING: [0x68,0x56,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1145668 <unknown>
bfmla za.h[w10, 0], {z18.h - z19.h}, z4.h[3] // 11000001-00010100-01010110-01101000
// CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3]
// CHECK-ENCODING: [0x68,0x56,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1145668 <unknown>
bfmla za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h[4] // 11000001-00010010-00011001-10100000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4]
// CHECK-ENCODING: [0xa0,0x19,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11219a0 <unknown>
bfmla za.h[w8, 0], {z12.h - z13.h}, z2.h[4] // 11000001-00010010-00011001-10100000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4]
// CHECK-ENCODING: [0xa0,0x19,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11219a0 <unknown>
bfmla za.h[w10, 1, vgx2], {z0.h, z1.h}, z10.h[4] // 11000001-00011010-01011000-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4]
// CHECK-ENCODING: [0x21,0x58,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11a5821 <unknown>
bfmla za.h[w10, 1], {z0.h - z1.h}, z10.h[4] // 11000001-00011010-01011000-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4]
// CHECK-ENCODING: [0x21,0x58,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11a5821 <unknown>
bfmla za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h[5] // 11000001-00011110-00011010-11101101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5]
// CHECK-ENCODING: [0xed,0x1a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1aed <unknown>
bfmla za.h[w8, 5], {z22.h - z23.h}, z14.h[5] // 11000001-00011110-00011010-11101101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5]
// CHECK-ENCODING: [0xed,0x1a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1aed <unknown>
bfmla za.h[w11, 2, vgx2], {z8.h, z9.h}, z1.h[2] // 11000001-00010001-01110101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2]
// CHECK-ENCODING: [0x22,0x75,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1117522 <unknown>
bfmla za.h[w11, 2], {z8.h - z9.h}, z1.h[2] // 11000001-00010001-01110101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2]
// CHECK-ENCODING: [0x22,0x75,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1117522 <unknown>
bfmla za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h[4] // 11000001-00011011-00111001-10100111
// CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4]
// CHECK-ENCODING: [0xa7,0x39,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11b39a7 <unknown>
bfmla za.h[w9, 7], {z12.h - z13.h}, z11.h[4] // 11000001-00011011-00111001-10100111
// CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4]
// CHECK-ENCODING: [0xa7,0x39,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11b39a7 <unknown>
bfmla za.h[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000-00010000-00001000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x08,0x10,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e01008 <unknown>
bfmla za.h[w8, 0], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00010000-00001000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x08,0x10,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e01008 <unknown>
bfmla za.h[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100-01010001-01001101
// CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x4d,0x51,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f4514d <unknown>
bfmla za.h[w10, 5], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01010001-01001101
// CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x4d,0x51,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f4514d <unknown>
bfmla za.h[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000-01110001-10001111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x8f,0x71,0xe8,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e8718f <unknown>
bfmla za.h[w11, 7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01110001-10001111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x8f,0x71,0xe8,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e8718f <unknown>
bfmla za.h[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110-01110011-11001111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xcf,0x73,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe73cf <unknown>
bfmla za.h[w11, 7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01110011-11001111
// CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xcf,0x73,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe73cf <unknown>
bfmla za.h[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000-00010010-00001101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h }
// CHECK-ENCODING: [0x0d,0x12,0xf0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f0120d <unknown>
bfmla za.h[w8, 5], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00010010-00001101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h }
// CHECK-ENCODING: [0x0d,0x12,0xf0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f0120d <unknown>
bfmla za.h[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110-00010000-00001001
// CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x09,0x10,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe1009 <unknown>
bfmla za.h[w8, 1], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00010000-00001001
// CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x09,0x10,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe1009 <unknown>
bfmla za.h[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100-01010010-01001000
// CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x48,0x52,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f45248 <unknown>
bfmla za.h[w10, 0], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01010010-01001000
// CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x48,0x52,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f45248 <unknown>
bfmla za.h[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010-00010001-10001000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h }
// CHECK-ENCODING: [0x88,0x11,0xe2,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e21188 <unknown>
bfmla za.h[w8, 0], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00010001-10001000
// CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h }
// CHECK-ENCODING: [0x88,0x11,0xe2,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e21188 <unknown>
bfmla za.h[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010-01010000-00001001
// CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h }
// CHECK-ENCODING: [0x09,0x50,0xfa,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fa5009 <unknown>
bfmla za.h[w10, 1], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01010000-00001001
// CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h }
// CHECK-ENCODING: [0x09,0x50,0xfa,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fa5009 <unknown>
bfmla za.h[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110-00010010-11001101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xcd,0x12,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe12cd <unknown>
bfmla za.h[w8, 5], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00010010-11001101
// CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xcd,0x12,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe12cd <unknown>
bfmla za.h[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000-01110001-00001010
// CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x0a,0x71,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e0710a <unknown>
bfmla za.h[w11, 2], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01110001-00001010
// CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x0a,0x71,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e0710a <unknown>
bfmla za.h[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010-00110001-10001111
// CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h }
// CHECK-ENCODING: [0x8f,0x31,0xea,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1ea318f <unknown>
bfmla za.h[w9, 7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00110001-10001111
// CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h }
// CHECK-ENCODING: [0x8f,0x31,0xea,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1ea318f <unknown>
bfmla za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00000000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x00,0x1c,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701c00 <unknown>
bfmla za.h[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00000000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x00,0x1c,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701c00 <unknown>
bfmla za.h[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01000101
// CHECK-INST: bfmla za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h
// CHECK-ENCODING: [0x45,0x5d,0x75,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1755d45 <unknown>
bfmla za.h[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01000101
// CHECK-INST: bfmla za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h
// CHECK-ENCODING: [0x45,0x5d,0x75,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1755d45 <unknown>
bfmla za.h[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h
// CHECK-ENCODING: [0xa7,0x7d,0x78,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1787da7 <unknown>
bfmla za.h[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h
// CHECK-ENCODING: [0xa7,0x7d,0x78,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1787da7 <unknown>
bfmla za.h[w11, 7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11100111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h
// CHECK-ENCODING: [0xe7,0x7f,0x7f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17f7fe7 <unknown>
bfmla za.h[w11, 7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11100111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h
// CHECK-ENCODING: [0xe7,0x7f,0x7f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17f7fe7 <unknown>
bfmla za.h[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h
// CHECK-ENCODING: [0x25,0x1e,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701e25 <unknown>
bfmla za.h[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h
// CHECK-ENCODING: [0x25,0x1e,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701e25 <unknown>
bfmla za.h[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h
// CHECK-ENCODING: [0x21,0x1c,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1c21 <unknown>
bfmla za.h[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h
// CHECK-ENCODING: [0x21,0x1c,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1c21 <unknown>
bfmla za.h[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01100000
// CHECK-INST: bfmla za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h
// CHECK-ENCODING: [0x60,0x5e,0x74,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1745e60 <unknown>
bfmla za.h[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01100000
// CHECK-INST: bfmla za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h
// CHECK-ENCODING: [0x60,0x5e,0x74,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1745e60 <unknown>
bfmla za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10000000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h
// CHECK-ENCODING: [0x80,0x1d,0x72,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1721d80 <unknown>
bfmla za.h[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10000000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h
// CHECK-ENCODING: [0x80,0x1d,0x72,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1721d80 <unknown>
bfmla za.h[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h
// CHECK-ENCODING: [0x21,0x5c,0x7a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17a5c21 <unknown>
bfmla za.h[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h
// CHECK-ENCODING: [0x21,0x5c,0x7a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17a5c21 <unknown>
bfmla za.h[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11000101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h
// CHECK-ENCODING: [0xc5,0x1e,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1ec5 <unknown>
bfmla za.h[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11000101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h
// CHECK-ENCODING: [0xc5,0x1e,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1ec5 <unknown>
bfmla za.h[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h
// CHECK-ENCODING: [0x22,0x7d,0x71,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1717d22 <unknown>
bfmla za.h[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h
// CHECK-ENCODING: [0x22,0x7d,0x71,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1717d22 <unknown>
bfmla za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10000111
// CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h
// CHECK-ENCODING: [0x87,0x3d,0x7b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17b3d87 <unknown>
bfmla za.h[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10000111
// CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h
// CHECK-ENCODING: [0x87,0x3d,0x7b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17b3d87 <unknown>
bfmla za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00100000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0]
// CHECK-ENCODING: [0x20,0x90,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109020 <unknown>
bfmla za.h[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00100000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0]
// CHECK-ENCODING: [0x20,0x90,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109020 <unknown>
bfmla za.h[w10, 5, vgx4], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00100101
// CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2]
// CHECK-ENCODING: [0x25,0xd5,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c115d525 <unknown>
bfmla za.h[w10, 5], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00100101
// CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2]
// CHECK-ENCODING: [0x25,0xd5,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c115d525 <unknown>
bfmla za.h[w11, 7, vgx4], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6]
// CHECK-ENCODING: [0xa7,0xfd,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c118fda7 <unknown>
bfmla za.h[w11, 7], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10100111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6]
// CHECK-ENCODING: [0xa7,0xfd,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c118fda7 <unknown>
bfmla za.h[w11, 7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10101111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7]
// CHECK-ENCODING: [0xaf,0xff,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11fffaf <unknown>
bfmla za.h[w11, 7], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10101111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7]
// CHECK-ENCODING: [0xaf,0xff,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11fffaf <unknown>
bfmla za.h[w8, 5, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6]
// CHECK-ENCODING: [0x25,0x9e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109e25 <unknown>
bfmla za.h[w8, 5], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00100101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6]
// CHECK-ENCODING: [0x25,0x9e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109e25 <unknown>
bfmla za.h[w8, 1, vgx4], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2]
// CHECK-ENCODING: [0x21,0x94,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9421 <unknown>
bfmla za.h[w8, 1], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00100001
// CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2]
// CHECK-ENCODING: [0x21,0x94,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9421 <unknown>
bfmla za.h[w10, 0, vgx4], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00101000
// CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3]
// CHECK-ENCODING: [0x28,0xd6,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c114d628 <unknown>
bfmla za.h[w10, 0], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00101000
// CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3]
// CHECK-ENCODING: [0x28,0xd6,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c114d628 <unknown>
bfmla za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10100000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4]
// CHECK-ENCODING: [0xa0,0x99,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11299a0 <unknown>
bfmla za.h[w8, 0], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10100000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4]
// CHECK-ENCODING: [0xa0,0x99,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11299a0 <unknown>
bfmla za.h[w10, 1, vgx4], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4]
// CHECK-ENCODING: [0x21,0xd8,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11ad821 <unknown>
bfmla za.h[w10, 1], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00100001
// CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4]
// CHECK-ENCODING: [0x21,0xd8,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11ad821 <unknown>
bfmla za.h[w8, 5, vgx4], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10101101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5]
// CHECK-ENCODING: [0xad,0x9a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9aad <unknown>
bfmla za.h[w8, 5], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10101101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5]
// CHECK-ENCODING: [0xad,0x9a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9aad <unknown>
bfmla za.h[w11, 2, vgx4], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2]
// CHECK-ENCODING: [0x22,0xf5,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c111f522 <unknown>
bfmla za.h[w11, 2], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00100010
// CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2]
// CHECK-ENCODING: [0x22,0xf5,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c111f522 <unknown>
bfmla za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10100111
// CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4]
// CHECK-ENCODING: [0xa7,0xb9,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11bb9a7 <unknown>
bfmla za.h[w9, 7], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10100111
// CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4]
// CHECK-ENCODING: [0xa7,0xb9,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11bb9a7 <unknown>
bfmla za.h[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00001000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x08,0x10,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11008 <unknown>
bfmla za.h[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00001000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x08,0x10,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11008 <unknown>
bfmla za.h[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00001101
// CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x0d,0x51,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f5510d <unknown>
bfmla za.h[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00001101
// CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x0d,0x51,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f5510d <unknown>
bfmla za.h[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10001111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x8f,0x71,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9718f <unknown>
bfmla za.h[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10001111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x8f,0x71,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9718f <unknown>
bfmla za.h[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10001111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x8f,0x73,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd738f <unknown>
bfmla za.h[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10001111
// CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x8f,0x73,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd738f <unknown>
bfmla za.h[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00001101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h }
// CHECK-ENCODING: [0x0d,0x12,0xf1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f1120d <unknown>
bfmla za.h[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00001101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h }
// CHECK-ENCODING: [0x0d,0x12,0xf1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f1120d <unknown>
bfmla za.h[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00001001
// CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x09,0x10,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd1009 <unknown>
bfmla za.h[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00001001
// CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x09,0x10,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd1009 <unknown>
bfmla za.h[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00001000
// CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x08,0x52,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f55208 <unknown>
bfmla za.h[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00001000
// CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x08,0x52,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f55208 <unknown>
bfmla za.h[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10001000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x88,0x11,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11188 <unknown>
bfmla za.h[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10001000
// CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x88,0x11,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11188 <unknown>
bfmla za.h[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00001001
// CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h }
// CHECK-ENCODING: [0x09,0x50,0xf9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f95009 <unknown>
bfmla za.h[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00001001
// CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h }
// CHECK-ENCODING: [0x09,0x50,0xf9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f95009 <unknown>
bfmla za.h[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10001101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x8d,0x12,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd128d <unknown>
bfmla za.h[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10001101
// CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x8d,0x12,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd128d <unknown>
bfmla za.h[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00001010
// CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x0a,0x71,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e1710a <unknown>
bfmla za.h[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00001010
// CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x0a,0x71,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e1710a <unknown>
bfmla za.h[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10001111
// CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x8f,0x31,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9318f <unknown>
bfmla za.h[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10001111
// CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x8f,0x31,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9318f <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmls-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmls-diagnostics.s
index 4174e24..d6e7713 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmls-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfmls-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid vector list
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmls.s b/llvm/test/MC/AArch64/SME2/bfmls.s
index 631da1e..8d5bdc4 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmls.s
+++ b/llvm/test/MC/AArch64/SME2/bfmls.s
@@ -1,876 +1,876 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfmls za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00011100-00001000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x08,0x1c,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601c08 <unknown>
bfmls za.h[w8, 0], {z0.h - z1.h}, z0.h // 11000001-01100000-00011100-00001000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h
// CHECK-ENCODING: [0x08,0x1c,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601c08 <unknown>
bfmls za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01011101-01001101
// CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h
// CHECK-ENCODING: [0x4d,0x5d,0x65,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1655d4d <unknown>
bfmls za.h[w10, 5], {z10.h - z11.h}, z5.h // 11000001-01100101-01011101-01001101
// CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h
// CHECK-ENCODING: [0x4d,0x5d,0x65,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1655d4d <unknown>
bfmls za.h[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01111101-10101111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h
// CHECK-ENCODING: [0xaf,0x7d,0x68,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1687daf <unknown>
bfmls za.h[w11, 7], {z13.h - z14.h}, z8.h // 11000001-01101000-01111101-10101111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h
// CHECK-ENCODING: [0xaf,0x7d,0x68,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1687daf <unknown>
bfmls za.h[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01111111-11101111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h
// CHECK-ENCODING: [0xef,0x7f,0x6f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16f7fef <unknown>
bfmls za.h[w11, 7], {z31.h - z0.h}, z15.h // 11000001-01101111-01111111-11101111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h
// CHECK-ENCODING: [0xef,0x7f,0x6f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16f7fef <unknown>
bfmls za.h[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00011110-00101101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h
// CHECK-ENCODING: [0x2d,0x1e,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601e2d <unknown>
bfmls za.h[w8, 5], {z17.h - z18.h}, z0.h // 11000001-01100000-00011110-00101101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h
// CHECK-ENCODING: [0x2d,0x1e,0x60,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1601e2d <unknown>
bfmls za.h[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00011100-00101001
// CHECK-INST: bfmls za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h
// CHECK-ENCODING: [0x29,0x1c,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1c29 <unknown>
bfmls za.h[w8, 1], {z1.h - z2.h}, z14.h // 11000001-01101110-00011100-00101001
// CHECK-INST: bfmls za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h
// CHECK-ENCODING: [0x29,0x1c,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1c29 <unknown>
bfmls za.h[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01011110-01101000
// CHECK-INST: bfmls za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h
// CHECK-ENCODING: [0x68,0x5e,0x64,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1645e68 <unknown>
bfmls za.h[w10, 0], {z19.h - z20.h}, z4.h // 11000001-01100100-01011110-01101000
// CHECK-INST: bfmls za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h
// CHECK-ENCODING: [0x68,0x5e,0x64,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1645e68 <unknown>
bfmls za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00011101-10001000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h
// CHECK-ENCODING: [0x88,0x1d,0x62,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1621d88 <unknown>
bfmls za.h[w8, 0], {z12.h - z13.h}, z2.h // 11000001-01100010-00011101-10001000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h
// CHECK-ENCODING: [0x88,0x1d,0x62,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1621d88 <unknown>
bfmls za.h[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01011100-00101001
// CHECK-INST: bfmls za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h
// CHECK-ENCODING: [0x29,0x5c,0x6a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16a5c29 <unknown>
bfmls za.h[w10, 1], {z1.h - z2.h}, z10.h // 11000001-01101010-01011100-00101001
// CHECK-INST: bfmls za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h
// CHECK-ENCODING: [0x29,0x5c,0x6a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16a5c29 <unknown>
bfmls za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00011110-11001101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h
// CHECK-ENCODING: [0xcd,0x1e,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1ecd <unknown>
bfmls za.h[w8, 5], {z22.h - z23.h}, z14.h // 11000001-01101110-00011110-11001101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h
// CHECK-ENCODING: [0xcd,0x1e,0x6e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16e1ecd <unknown>
bfmls za.h[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01111101-00101010
// CHECK-INST: bfmls za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h
// CHECK-ENCODING: [0x2a,0x7d,0x61,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1617d2a <unknown>
bfmls za.h[w11, 2], {z9.h - z10.h}, z1.h // 11000001-01100001-01111101-00101010
// CHECK-INST: bfmls za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h
// CHECK-ENCODING: [0x2a,0x7d,0x61,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1617d2a <unknown>
bfmls za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00111101-10001111
// CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h
// CHECK-ENCODING: [0x8f,0x3d,0x6b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16b3d8f <unknown>
bfmls za.h[w9, 7], {z12.h - z13.h}, z11.h // 11000001-01101011-00111101-10001111
// CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h
// CHECK-ENCODING: [0x8f,0x3d,0x6b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c16b3d8f <unknown>
bfmls za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-00010000-00010000-00110000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0]
// CHECK-ENCODING: [0x30,0x10,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101030 <unknown>
bfmls za.h[w8, 0], {z0.h - z1.h}, z0.h[0] // 11000001-00010000-00010000-00110000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0]
// CHECK-ENCODING: [0x30,0x10,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101030 <unknown>
bfmls za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h[2] // 11000001-00010101-01010101-01110101
// CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2]
// CHECK-ENCODING: [0x75,0x55,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1155575 <unknown>
bfmls za.h[w10, 5], {z10.h - z11.h}, z5.h[2] // 11000001-00010101-01010101-01110101
// CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2]
// CHECK-ENCODING: [0x75,0x55,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1155575 <unknown>
bfmls za.h[w11, 7, vgx2], {z12.h, z13.h}, z8.h[6] // 11000001-00011000-01111101-10110111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6]
// CHECK-ENCODING: [0xb7,0x7d,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1187db7 <unknown>
bfmls za.h[w11, 7], {z12.h - z13.h}, z8.h[6] // 11000001-00011000-01111101-10110111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6]
// CHECK-ENCODING: [0xb7,0x7d,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1187db7 <unknown>
bfmls za.h[w11, 7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001-00011111-01111111-11111111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7]
// CHECK-ENCODING: [0xff,0x7f,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11f7fff <unknown>
bfmls za.h[w11, 7], {z30.h - z31.h}, z15.h[7] // 11000001-00011111-01111111-11111111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7]
// CHECK-ENCODING: [0xff,0x7f,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11f7fff <unknown>
bfmls za.h[w8, 5, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001-00010000-00011110-00110101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6]
// CHECK-ENCODING: [0x35,0x1e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101e35 <unknown>
bfmls za.h[w8, 5], {z16.h - z17.h}, z0.h[6] // 11000001-00010000-00011110-00110101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6]
// CHECK-ENCODING: [0x35,0x1e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1101e35 <unknown>
bfmls za.h[w8, 1, vgx2], {z0.h, z1.h}, z14.h[2] // 11000001-00011110-00010100-00110001
// CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2]
// CHECK-ENCODING: [0x31,0x14,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1431 <unknown>
bfmls za.h[w8, 1], {z0.h - z1.h}, z14.h[2] // 11000001-00011110-00010100-00110001
// CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2]
// CHECK-ENCODING: [0x31,0x14,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1431 <unknown>
bfmls za.h[w10, 0, vgx2], {z18.h, z19.h}, z4.h[3] // 11000001-00010100-01010110-01111000
// CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3]
// CHECK-ENCODING: [0x78,0x56,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1145678 <unknown>
bfmls za.h[w10, 0], {z18.h - z19.h}, z4.h[3] // 11000001-00010100-01010110-01111000
// CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3]
// CHECK-ENCODING: [0x78,0x56,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1145678 <unknown>
bfmls za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h[4] // 11000001-00010010-00011001-10110000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4]
// CHECK-ENCODING: [0xb0,0x19,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11219b0 <unknown>
bfmls za.h[w8, 0], {z12.h - z13.h}, z2.h[4] // 11000001-00010010-00011001-10110000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4]
// CHECK-ENCODING: [0xb0,0x19,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11219b0 <unknown>
bfmls za.h[w10, 1, vgx2], {z0.h, z1.h}, z10.h[4] // 11000001-00011010-01011000-00110001
// CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4]
// CHECK-ENCODING: [0x31,0x58,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11a5831 <unknown>
bfmls za.h[w10, 1], {z0.h - z1.h}, z10.h[4] // 11000001-00011010-01011000-00110001
// CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4]
// CHECK-ENCODING: [0x31,0x58,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11a5831 <unknown>
bfmls za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h[5] // 11000001-00011110-00011010-11111101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5]
// CHECK-ENCODING: [0xfd,0x1a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1afd <unknown>
bfmls za.h[w8, 5], {z22.h - z23.h}, z14.h[5] // 11000001-00011110-00011010-11111101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5]
// CHECK-ENCODING: [0xfd,0x1a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e1afd <unknown>
bfmls za.h[w11, 2, vgx2], {z8.h, z9.h}, z1.h[2] // 11000001-00010001-01110101-00110010
// CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2]
// CHECK-ENCODING: [0x32,0x75,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1117532 <unknown>
bfmls za.h[w11, 2], {z8.h - z9.h}, z1.h[2] // 11000001-00010001-01110101-00110010
// CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2]
// CHECK-ENCODING: [0x32,0x75,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1117532 <unknown>
bfmls za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h[4] // 11000001-00011011-00111001-10110111
// CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4]
// CHECK-ENCODING: [0xb7,0x39,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11b39b7 <unknown>
bfmls za.h[w9, 7], {z12.h - z13.h}, z11.h[4] // 11000001-00011011-00111001-10110111
// CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4]
// CHECK-ENCODING: [0xb7,0x39,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11b39b7 <unknown>
bfmls za.h[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000-00010000-00011000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x18,0x10,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e01018 <unknown>
bfmls za.h[w8, 0], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00010000-00011000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x18,0x10,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e01018 <unknown>
bfmls za.h[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100-01010001-01011101
// CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x5d,0x51,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f4515d <unknown>
bfmls za.h[w10, 5], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01010001-01011101
// CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x5d,0x51,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f4515d <unknown>
bfmls za.h[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000-01110001-10011111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x9f,0x71,0xe8,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e8719f <unknown>
bfmls za.h[w11, 7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01110001-10011111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h }
// CHECK-ENCODING: [0x9f,0x71,0xe8,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e8719f <unknown>
bfmls za.h[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110-01110011-11011111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xdf,0x73,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe73df <unknown>
bfmls za.h[w11, 7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01110011-11011111
// CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xdf,0x73,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe73df <unknown>
bfmls za.h[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000-00010010-00011101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h }
// CHECK-ENCODING: [0x1d,0x12,0xf0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f0121d <unknown>
bfmls za.h[w8, 5], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00010010-00011101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h }
// CHECK-ENCODING: [0x1d,0x12,0xf0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f0121d <unknown>
bfmls za.h[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110-00010000-00011001
// CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x19,0x10,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe1019 <unknown>
bfmls za.h[w8, 1], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00010000-00011001
// CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0x19,0x10,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe1019 <unknown>
bfmls za.h[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100-01010010-01011000
// CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x58,0x52,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f45258 <unknown>
bfmls za.h[w10, 0], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01010010-01011000
// CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x58,0x52,0xf4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f45258 <unknown>
bfmls za.h[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010-00010001-10011000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h }
// CHECK-ENCODING: [0x98,0x11,0xe2,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e21198 <unknown>
bfmls za.h[w8, 0], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00010001-10011000
// CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h }
// CHECK-ENCODING: [0x98,0x11,0xe2,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e21198 <unknown>
bfmls za.h[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010-01010000-00011001
// CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h }
// CHECK-ENCODING: [0x19,0x50,0xfa,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fa5019 <unknown>
bfmls za.h[w10, 1], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01010000-00011001
// CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h }
// CHECK-ENCODING: [0x19,0x50,0xfa,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fa5019 <unknown>
bfmls za.h[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110-00010010-11011101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xdd,0x12,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe12dd <unknown>
bfmls za.h[w8, 5], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00010010-11011101
// CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xdd,0x12,0xfe,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fe12dd <unknown>
bfmls za.h[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000-01110001-00011010
// CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x1a,0x71,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e0711a <unknown>
bfmls za.h[w11, 2], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01110001-00011010
// CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h }
// CHECK-ENCODING: [0x1a,0x71,0xe0,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e0711a <unknown>
bfmls za.h[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010-00110001-10011111
// CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h }
// CHECK-ENCODING: [0x9f,0x31,0xea,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1ea319f <unknown>
bfmls za.h[w9, 7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00110001-10011111
// CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h }
// CHECK-ENCODING: [0x9f,0x31,0xea,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1ea319f <unknown>
bfmls za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00001000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x08,0x1c,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701c08 <unknown>
bfmls za.h[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00001000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h
// CHECK-ENCODING: [0x08,0x1c,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701c08 <unknown>
bfmls za.h[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01001101
// CHECK-INST: bfmls za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h
// CHECK-ENCODING: [0x4d,0x5d,0x75,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1755d4d <unknown>
bfmls za.h[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01001101
// CHECK-INST: bfmls za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h
// CHECK-ENCODING: [0x4d,0x5d,0x75,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1755d4d <unknown>
bfmls za.h[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10101111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h
// CHECK-ENCODING: [0xaf,0x7d,0x78,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1787daf <unknown>
bfmls za.h[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10101111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h
// CHECK-ENCODING: [0xaf,0x7d,0x78,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1787daf <unknown>
bfmls za.h[w11, 7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11101111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h
// CHECK-ENCODING: [0xef,0x7f,0x7f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17f7fef <unknown>
bfmls za.h[w11, 7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11101111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h
// CHECK-ENCODING: [0xef,0x7f,0x7f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17f7fef <unknown>
bfmls za.h[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00101101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h
// CHECK-ENCODING: [0x2d,0x1e,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701e2d <unknown>
bfmls za.h[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00101101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h
// CHECK-ENCODING: [0x2d,0x1e,0x70,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1701e2d <unknown>
bfmls za.h[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00101001
// CHECK-INST: bfmls za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h
// CHECK-ENCODING: [0x29,0x1c,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1c29 <unknown>
bfmls za.h[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00101001
// CHECK-INST: bfmls za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h
// CHECK-ENCODING: [0x29,0x1c,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1c29 <unknown>
bfmls za.h[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01101000
// CHECK-INST: bfmls za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h
// CHECK-ENCODING: [0x68,0x5e,0x74,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1745e68 <unknown>
bfmls za.h[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01101000
// CHECK-INST: bfmls za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h
// CHECK-ENCODING: [0x68,0x5e,0x74,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1745e68 <unknown>
bfmls za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10001000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h
// CHECK-ENCODING: [0x88,0x1d,0x72,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1721d88 <unknown>
bfmls za.h[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10001000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h
// CHECK-ENCODING: [0x88,0x1d,0x72,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1721d88 <unknown>
bfmls za.h[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00101001
// CHECK-INST: bfmls za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h
// CHECK-ENCODING: [0x29,0x5c,0x7a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17a5c29 <unknown>
bfmls za.h[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00101001
// CHECK-INST: bfmls za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h
// CHECK-ENCODING: [0x29,0x5c,0x7a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17a5c29 <unknown>
bfmls za.h[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11001101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h
// CHECK-ENCODING: [0xcd,0x1e,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1ecd <unknown>
bfmls za.h[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11001101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h
// CHECK-ENCODING: [0xcd,0x1e,0x7e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17e1ecd <unknown>
bfmls za.h[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00101010
// CHECK-INST: bfmls za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h
// CHECK-ENCODING: [0x2a,0x7d,0x71,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1717d2a <unknown>
bfmls za.h[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00101010
// CHECK-INST: bfmls za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h
// CHECK-ENCODING: [0x2a,0x7d,0x71,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1717d2a <unknown>
bfmls za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10001111
// CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h
// CHECK-ENCODING: [0x8f,0x3d,0x7b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17b3d8f <unknown>
bfmls za.h[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10001111
// CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h
// CHECK-ENCODING: [0x8f,0x3d,0x7b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c17b3d8f <unknown>
bfmls za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00110000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0]
// CHECK-ENCODING: [0x30,0x90,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109030 <unknown>
bfmls za.h[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00110000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0]
// CHECK-ENCODING: [0x30,0x90,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109030 <unknown>
bfmls za.h[w10, 5, vgx4], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00110101
// CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2]
// CHECK-ENCODING: [0x35,0xd5,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c115d535 <unknown>
bfmls za.h[w10, 5], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00110101
// CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2]
// CHECK-ENCODING: [0x35,0xd5,0x15,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c115d535 <unknown>
bfmls za.h[w11, 7, vgx4], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10110111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6]
// CHECK-ENCODING: [0xb7,0xfd,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c118fdb7 <unknown>
bfmls za.h[w11, 7], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10110111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6]
// CHECK-ENCODING: [0xb7,0xfd,0x18,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c118fdb7 <unknown>
bfmls za.h[w11, 7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10111111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7]
// CHECK-ENCODING: [0xbf,0xff,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11fffbf <unknown>
bfmls za.h[w11, 7], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10111111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7]
// CHECK-ENCODING: [0xbf,0xff,0x1f,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11fffbf <unknown>
bfmls za.h[w8, 5, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00110101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6]
// CHECK-ENCODING: [0x35,0x9e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109e35 <unknown>
bfmls za.h[w8, 5], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00110101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6]
// CHECK-ENCODING: [0x35,0x9e,0x10,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1109e35 <unknown>
bfmls za.h[w8, 1, vgx4], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00110001
// CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2]
// CHECK-ENCODING: [0x31,0x94,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9431 <unknown>
bfmls za.h[w8, 1], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00110001
// CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2]
// CHECK-ENCODING: [0x31,0x94,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9431 <unknown>
bfmls za.h[w10, 0, vgx4], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00111000
// CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3]
// CHECK-ENCODING: [0x38,0xd6,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c114d638 <unknown>
bfmls za.h[w10, 0], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00111000
// CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3]
// CHECK-ENCODING: [0x38,0xd6,0x14,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c114d638 <unknown>
bfmls za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10110000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4]
// CHECK-ENCODING: [0xb0,0x99,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11299b0 <unknown>
bfmls za.h[w8, 0], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10110000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4]
// CHECK-ENCODING: [0xb0,0x99,0x12,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11299b0 <unknown>
bfmls za.h[w10, 1, vgx4], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00110001
// CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4]
// CHECK-ENCODING: [0x31,0xd8,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11ad831 <unknown>
bfmls za.h[w10, 1], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00110001
// CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4]
// CHECK-ENCODING: [0x31,0xd8,0x1a,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11ad831 <unknown>
bfmls za.h[w8, 5, vgx4], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10111101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5]
// CHECK-ENCODING: [0xbd,0x9a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9abd <unknown>
bfmls za.h[w8, 5], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10111101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5]
// CHECK-ENCODING: [0xbd,0x9a,0x1e,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11e9abd <unknown>
bfmls za.h[w11, 2, vgx4], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00110010
// CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2]
// CHECK-ENCODING: [0x32,0xf5,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c111f532 <unknown>
bfmls za.h[w11, 2], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00110010
// CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2]
// CHECK-ENCODING: [0x32,0xf5,0x11,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c111f532 <unknown>
bfmls za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10110111
// CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4]
// CHECK-ENCODING: [0xb7,0xb9,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11bb9b7 <unknown>
bfmls za.h[w9, 7], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10110111
// CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4]
// CHECK-ENCODING: [0xb7,0xb9,0x1b,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c11bb9b7 <unknown>
bfmls za.h[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00011000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x18,0x10,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11018 <unknown>
bfmls za.h[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00011000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x18,0x10,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11018 <unknown>
bfmls za.h[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00011101
// CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x1d,0x51,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f5511d <unknown>
bfmls za.h[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00011101
// CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x1d,0x51,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f5511d <unknown>
bfmls za.h[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10011111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x9f,0x71,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9719f <unknown>
bfmls za.h[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10011111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x9f,0x71,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9719f <unknown>
bfmls za.h[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10011111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x9f,0x73,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd739f <unknown>
bfmls za.h[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10011111
// CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x9f,0x73,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd739f <unknown>
bfmls za.h[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00011101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h }
// CHECK-ENCODING: [0x1d,0x12,0xf1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f1121d <unknown>
bfmls za.h[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00011101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h }
// CHECK-ENCODING: [0x1d,0x12,0xf1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f1121d <unknown>
bfmls za.h[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00011001
// CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x19,0x10,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd1019 <unknown>
bfmls za.h[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00011001
// CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x19,0x10,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd1019 <unknown>
bfmls za.h[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00011000
// CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x18,0x52,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f55218 <unknown>
bfmls za.h[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00011000
// CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h }
// CHECK-ENCODING: [0x18,0x52,0xf5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f55218 <unknown>
bfmls za.h[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10011000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x98,0x11,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11198 <unknown>
bfmls za.h[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10011000
// CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x98,0x11,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e11198 <unknown>
bfmls za.h[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00011001
// CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h }
// CHECK-ENCODING: [0x19,0x50,0xf9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f95019 <unknown>
bfmls za.h[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00011001
// CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h }
// CHECK-ENCODING: [0x19,0x50,0xf9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1f95019 <unknown>
bfmls za.h[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10011101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x9d,0x12,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd129d <unknown>
bfmls za.h[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10011101
// CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h }
// CHECK-ENCODING: [0x9d,0x12,0xfd,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1fd129d <unknown>
bfmls za.h[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00011010
// CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x1a,0x71,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e1711a <unknown>
bfmls za.h[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00011010
// CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h }
// CHECK-ENCODING: [0x1a,0x71,0xe1,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e1711a <unknown>
bfmls za.h[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10011111
// CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x9f,0x31,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9319f <unknown>
bfmls za.h[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10011111
// CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h }
// CHECK-ENCODING: [0x9f,0x31,0xe9,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e9319f <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmopa-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmopa-diagnostics.s
index 8b418f4..e6d208d 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmopa-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfmopa-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid predicate register
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmopa.s b/llvm/test/MC/AArch64/SME2/bfmopa.s
index 7a08185..2a9b1e5 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmopa.s
+++ b/llvm/test/MC/AArch64/SME2/bfmopa.s
@@ -1,84 +1,84 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfmopa za0.h, p0/m, p0/m, z0.h, z0.h // 10000001-10100000-00000000-00001000
// CHECK-INST: bfmopa za0.h, p0/m, p0/m, z0.h, z0.h
// CHECK-ENCODING: [0x08,0x00,0xa0,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a00008 <unknown>
bfmopa za1.h, p5/m, p2/m, z10.h, z21.h // 10000001-10110101-01010101-01001001
// CHECK-INST: bfmopa za1.h, p5/m, p2/m, z10.h, z21.h
// CHECK-ENCODING: [0x49,0x55,0xb5,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81b55549 <unknown>
bfmopa za1.h, p3/m, p7/m, z13.h, z8.h // 10000001-10101000-11101101-10101001
// CHECK-INST: bfmopa za1.h, p3/m, p7/m, z13.h, z8.h
// CHECK-ENCODING: [0xa9,0xed,0xa8,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a8eda9 <unknown>
bfmopa za1.h, p7/m, p7/m, z31.h, z31.h // 10000001-10111111-11111111-11101001
// CHECK-INST: bfmopa za1.h, p7/m, p7/m, z31.h, z31.h
// CHECK-ENCODING: [0xe9,0xff,0xbf,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81bfffe9 <unknown>
bfmopa za1.h, p3/m, p0/m, z17.h, z16.h // 10000001-10110000-00001110-00101001
// CHECK-INST: bfmopa za1.h, p3/m, p0/m, z17.h, z16.h
// CHECK-ENCODING: [0x29,0x0e,0xb0,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81b00e29 <unknown>
bfmopa za1.h, p1/m, p4/m, z1.h, z30.h // 10000001-10111110-10000100-00101001
// CHECK-INST: bfmopa za1.h, p1/m, p4/m, z1.h, z30.h
// CHECK-ENCODING: [0x29,0x84,0xbe,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81be8429 <unknown>
bfmopa za0.h, p5/m, p2/m, z19.h, z20.h // 10000001-10110100-01010110-01101000
// CHECK-INST: bfmopa za0.h, p5/m, p2/m, z19.h, z20.h
// CHECK-ENCODING: [0x68,0x56,0xb4,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81b45668 <unknown>
bfmopa za0.h, p6/m, p0/m, z12.h, z2.h // 10000001-10100010-00011001-10001000
// CHECK-INST: bfmopa za0.h, p6/m, p0/m, z12.h, z2.h
// CHECK-ENCODING: [0x88,0x19,0xa2,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a21988 <unknown>
bfmopa za1.h, p2/m, p6/m, z1.h, z26.h // 10000001-10111010-11001000-00101001
// CHECK-INST: bfmopa za1.h, p2/m, p6/m, z1.h, z26.h
// CHECK-ENCODING: [0x29,0xc8,0xba,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81bac829 <unknown>
bfmopa za1.h, p2/m, p0/m, z22.h, z30.h // 10000001-10111110-00001010-11001001
// CHECK-INST: bfmopa za1.h, p2/m, p0/m, z22.h, z30.h
// CHECK-ENCODING: [0xc9,0x0a,0xbe,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81be0ac9 <unknown>
bfmopa za0.h, p5/m, p7/m, z9.h, z1.h // 10000001-10100001-11110101-00101000
// CHECK-INST: bfmopa za0.h, p5/m, p7/m, z9.h, z1.h
// CHECK-ENCODING: [0x28,0xf5,0xa1,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a1f528 <unknown>
bfmopa za1.h, p2/m, p5/m, z12.h, z11.h // 10000001-10101011-10101001-10001001
// CHECK-INST: bfmopa za1.h, p2/m, p5/m, z12.h, z11.h
// CHECK-ENCODING: [0x89,0xa9,0xab,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81aba989 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmops-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmops-diagnostics.s
index 84275af..14c25de 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmops-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfmops-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Invalid predicate register
diff --git a/llvm/test/MC/AArch64/SME2p1/bfmops.s b/llvm/test/MC/AArch64/SME2/bfmops.s
index 380c65f..2b76986 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfmops.s
+++ b/llvm/test/MC/AArch64/SME2/bfmops.s
@@ -1,84 +1,84 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfmops za0.h, p0/m, p0/m, z0.h, z0.h // 10000001-10100000-00000000-00011000
// CHECK-INST: bfmops za0.h, p0/m, p0/m, z0.h, z0.h
// CHECK-ENCODING: [0x18,0x00,0xa0,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a00018 <unknown>
bfmops za1.h, p5/m, p2/m, z10.h, z21.h // 10000001-10110101-01010101-01011001
// CHECK-INST: bfmops za1.h, p5/m, p2/m, z10.h, z21.h
// CHECK-ENCODING: [0x59,0x55,0xb5,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81b55559 <unknown>
bfmops za1.h, p3/m, p7/m, z13.h, z8.h // 10000001-10101000-11101101-10111001
// CHECK-INST: bfmops za1.h, p3/m, p7/m, z13.h, z8.h
// CHECK-ENCODING: [0xb9,0xed,0xa8,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a8edb9 <unknown>
bfmops za1.h, p7/m, p7/m, z31.h, z31.h // 10000001-10111111-11111111-11111001
// CHECK-INST: bfmops za1.h, p7/m, p7/m, z31.h, z31.h
// CHECK-ENCODING: [0xf9,0xff,0xbf,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81bffff9 <unknown>
bfmops za1.h, p3/m, p0/m, z17.h, z16.h // 10000001-10110000-00001110-00111001
// CHECK-INST: bfmops za1.h, p3/m, p0/m, z17.h, z16.h
// CHECK-ENCODING: [0x39,0x0e,0xb0,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81b00e39 <unknown>
bfmops za1.h, p1/m, p4/m, z1.h, z30.h // 10000001-10111110-10000100-00111001
// CHECK-INST: bfmops za1.h, p1/m, p4/m, z1.h, z30.h
// CHECK-ENCODING: [0x39,0x84,0xbe,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81be8439 <unknown>
bfmops za0.h, p5/m, p2/m, z19.h, z20.h // 10000001-10110100-01010110-01111000
// CHECK-INST: bfmops za0.h, p5/m, p2/m, z19.h, z20.h
// CHECK-ENCODING: [0x78,0x56,0xb4,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81b45678 <unknown>
bfmops za0.h, p6/m, p0/m, z12.h, z2.h // 10000001-10100010-00011001-10011000
// CHECK-INST: bfmops za0.h, p6/m, p0/m, z12.h, z2.h
// CHECK-ENCODING: [0x98,0x19,0xa2,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a21998 <unknown>
bfmops za1.h, p2/m, p6/m, z1.h, z26.h // 10000001-10111010-11001000-00111001
// CHECK-INST: bfmops za1.h, p2/m, p6/m, z1.h, z26.h
// CHECK-ENCODING: [0x39,0xc8,0xba,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81bac839 <unknown>
bfmops za1.h, p2/m, p0/m, z22.h, z30.h // 10000001-10111110-00001010-11011001
// CHECK-INST: bfmops za1.h, p2/m, p0/m, z22.h, z30.h
// CHECK-ENCODING: [0xd9,0x0a,0xbe,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81be0ad9 <unknown>
bfmops za0.h, p5/m, p7/m, z9.h, z1.h // 10000001-10100001-11110101-00111000
// CHECK-INST: bfmops za0.h, p5/m, p7/m, z9.h, z1.h
// CHECK-ENCODING: [0x38,0xf5,0xa1,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81a1f538 <unknown>
bfmops za1.h, p2/m, p5/m, z12.h, z11.h // 10000001-10101011-10101001-10011001
// CHECK-INST: bfmops za1.h, p2/m, p5/m, z12.h, z11.h
// CHECK-ENCODING: [0x99,0xa9,0xab,0x81]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: 81aba999 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p1/bfsub-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfsub-diagnostics.s
index 9d3680e..5dade3e 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfsub-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/bfsub-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s
// --------------------------------------------------------------------------//
// Out of range index offset
diff --git a/llvm/test/MC/AArch64/SME2p1/bfsub.s b/llvm/test/MC/AArch64/SME2/bfsub.s
index dac5f97..6cfd507 100644
--- a/llvm/test/MC/AArch64/SME2p1/bfsub.s
+++ b/llvm/test/MC/AArch64/SME2/bfsub.s
@@ -1,300 +1,300 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \
-// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
bfsub za.h[w8, 0, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00001000
// CHECK-INST: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x08,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c08 <unknown>
bfsub za.h[w8, 0], {z0.h - z1.h} // 11000001-11100100-00011100-00001000
// CHECK-INST: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x08,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c08 <unknown>
bfsub za.h[w10, 5, vgx2], {z10.h, z11.h} // 11000001-11100100-01011101-01001101
// CHECK-INST: bfsub za.h[w10, 5, vgx2], { z10.h, z11.h }
// CHECK-ENCODING: [0x4d,0x5d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45d4d <unknown>
bfsub za.h[w10, 5], {z10.h - z11.h} // 11000001-11100100-01011101-01001101
// CHECK-INST: bfsub za.h[w10, 5, vgx2], { z10.h, z11.h }
// CHECK-ENCODING: [0x4d,0x5d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45d4d <unknown>
bfsub za.h[w11, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-01111101-10001111
// CHECK-INST: bfsub za.h[w11, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x8f,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d8f <unknown>
bfsub za.h[w11, 7], {z12.h - z13.h} // 11000001-11100100-01111101-10001111
// CHECK-INST: bfsub za.h[w11, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x8f,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d8f <unknown>
bfsub za.h[w11, 7, vgx2], {z30.h, z31.h} // 11000001-11100100-01111111-11001111
// CHECK-INST: bfsub za.h[w11, 7, vgx2], { z30.h, z31.h }
// CHECK-ENCODING: [0xcf,0x7f,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47fcf <unknown>
bfsub za.h[w11, 7], {z30.h - z31.h} // 11000001-11100100-01111111-11001111
// CHECK-INST: bfsub za.h[w11, 7, vgx2], { z30.h, z31.h }
// CHECK-ENCODING: [0xcf,0x7f,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47fcf <unknown>
bfsub za.h[w8, 5, vgx2], {z16.h, z17.h} // 11000001-11100100-00011110-00001101
// CHECK-INST: bfsub za.h[w8, 5, vgx2], { z16.h, z17.h }
// CHECK-ENCODING: [0x0d,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41e0d <unknown>
bfsub za.h[w8, 5], {z16.h - z17.h} // 11000001-11100100-00011110-00001101
// CHECK-INST: bfsub za.h[w8, 5, vgx2], { z16.h, z17.h }
// CHECK-ENCODING: [0x0d,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41e0d <unknown>
bfsub za.h[w8, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00001001
// CHECK-INST: bfsub za.h[w8, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x09,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c09 <unknown>
bfsub za.h[w8, 1], {z0.h - z1.h} // 11000001-11100100-00011100-00001001
// CHECK-INST: bfsub za.h[w8, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x09,0x1c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41c09 <unknown>
bfsub za.h[w10, 0, vgx2], {z18.h, z19.h} // 11000001-11100100-01011110, 01001000
// CHECK-INST: bfsub za.h[w10, 0, vgx2], { z18.h, z19.h }
// CHECK-ENCODING: [0x48,0x5e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45e48 <unknown>
bfsub za.h[w10, 0], {z18.h - z19.h} // 11000001-11100100-01011110-01001000
// CHECK-INST: bfsub za.h[w10, 0, vgx2], { z18.h, z19.h }
// CHECK-ENCODING: [0x48,0x5e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45e48 <unknown>
bfsub za.h[w8, 0, vgx2], {z12.h, z13.h} // 11000001-11100100-00011101-10001000
// CHECK-INST: bfsub za.h[w8, 0, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x88,0x1d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41d88 <unknown>
bfsub za.h[w8, 0], {z12.h - z13.h} // 11000001-11100100-00011101-10001000
// CHECK-INST: bfsub za.h[w8, 0, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x88,0x1d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41d88 <unknown>
bfsub za.h[w10, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-01011100-00001001
// CHECK-INST: bfsub za.h[w10, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x09,0x5c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45c09 <unknown>
bfsub za.h[w10, 1], {z0.h - z1.h} // 11000001-11100100-01011100-00001001
// CHECK-INST: bfsub za.h[w10, 1, vgx2], { z0.h, z1.h }
// CHECK-ENCODING: [0x09,0x5c,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e45c09 <unknown>
bfsub za.h[w8, 5, vgx2], {z22.h, z23.h} // 11000001-11100100-00011110, 11001101
// CHECK-INST: bfsub za.h[w8, 5, vgx2], { z22.h, z23.h }
// CHECK-ENCODING: [0xcd,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41ecd <unknown>
bfsub za.h[w8, 5], {z22.h - z23.h} // 11000001-11100100-00011110-11001101
// CHECK-INST: bfsub za.h[w8, 5, vgx2], { z22.h, z23.h }
// CHECK-ENCODING: [0xcd,0x1e,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e41ecd <unknown>
bfsub za.h[w11, 2, vgx2], {z8.h, z9.h} // 11000001-11100100-01111101-00001010
// CHECK-INST: bfsub za.h[w11, 2, vgx2], { z8.h, z9.h }
// CHECK-ENCODING: [0x0a,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d0a <unknown>
bfsub za.h[w11, 2], {z8.h - z9.h} // 11000001-11100100-01111101-00001010
// CHECK-INST: bfsub za.h[w11, 2, vgx2], { z8.h, z9.h }
// CHECK-ENCODING: [0x0a,0x7d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e47d0a <unknown>
bfsub za.h[w9, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-00111101-10001111
// CHECK-INST: bfsub za.h[w9, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x8f,0x3d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e43d8f <unknown>
bfsub za.h[w9, 7], {z12.h - z13.h} // 11000001-11100100-00111101-10001111
// CHECK-INST: bfsub za.h[w9, 7, vgx2], { z12.h, z13.h }
// CHECK-ENCODING: [0x8f,0x3d,0xe4,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e43d8f <unknown>
bfsub za.h[w8, 0, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00001000
// CHECK-INST: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x08,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c08 <unknown>
bfsub za.h[w8, 0], {z0.h - z3.h} // 11000001-11100101-00011100-00001000
// CHECK-INST: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x08,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c08 <unknown>
bfsub za.h[w10, 5, vgx4], {z8.h - z11.h} // 11000001-11100101-01011101-00001101
// CHECK-INST: bfsub za.h[w10, 5, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x0d,0x5d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55d0d <unknown>
bfsub za.h[w10, 5], {z8.h - z11.h} // 11000001-11100101-01011101-00001101
// CHECK-INST: bfsub za.h[w10, 5, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x0d,0x5d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55d0d <unknown>
bfsub za.h[w11, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-01111101-10001111
// CHECK-INST: bfsub za.h[w11, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x8f,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d8f <unknown>
bfsub za.h[w11, 7], {z12.h - z15.h} // 11000001-11100101-01111101-10001111
// CHECK-INST: bfsub za.h[w11, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x8f,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d8f <unknown>
bfsub za.h[w11, 7, vgx4], {z28.h - z31.h} // 11000001-11100101-01111111-10001111
// CHECK-INST: bfsub za.h[w11, 7, vgx4], { z28.h - z31.h }
// CHECK-ENCODING: [0x8f,0x7f,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57f8f <unknown>
bfsub za.h[w11, 7], {z28.h - z31.h} // 11000001-11100101-01111111-10001111
// CHECK-INST: bfsub za.h[w11, 7, vgx4], { z28.h - z31.h }
// CHECK-ENCODING: [0x8f,0x7f,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57f8f <unknown>
bfsub za.h[w8, 5, vgx4], {z16.h - z19.h} // 11000001-11100101-00011110-00001101
// CHECK-INST: bfsub za.h[w8, 5, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x0d,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e0d <unknown>
bfsub za.h[w8, 5], {z16.h - z19.h} // 11000001-11100101-00011110-00001101
// CHECK-INST: bfsub za.h[w8, 5, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x0d,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e0d <unknown>
bfsub za.h[w8, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00001001
// CHECK-INST: bfsub za.h[w8, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x09,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c09 <unknown>
bfsub za.h[w8, 1], {z0.h - z3.h} // 11000001-11100101-00011100-00001001
// CHECK-INST: bfsub za.h[w8, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x09,0x1c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51c09 <unknown>
bfsub za.h[w10, 0, vgx4], {z16.h - z19.h} // 11000001-11100101-01011110-00001000
// CHECK-INST: bfsub za.h[w10, 0, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x08,0x5e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55e08 <unknown>
bfsub za.h[w10, 0], {z16.h - z19.h} // 11000001-11100101-01011110-00001000
// CHECK-INST: bfsub za.h[w10, 0, vgx4], { z16.h - z19.h }
// CHECK-ENCODING: [0x08,0x5e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55e08 <unknown>
bfsub za.h[w8, 0, vgx4], {z12.h - z15.h} // 11000001-11100101-00011101-10001000
// CHECK-INST: bfsub za.h[w8, 0, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x88,0x1d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51d88 <unknown>
bfsub za.h[w8, 0], {z12.h - z15.h} // 11000001-11100101-00011101-10001000
// CHECK-INST: bfsub za.h[w8, 0, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x88,0x1d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51d88 <unknown>
bfsub za.h[w10, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-01011100-00001001
// CHECK-INST: bfsub za.h[w10, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x09,0x5c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55c09 <unknown>
bfsub za.h[w10, 1], {z0.h - z3.h} // 11000001-11100101-01011100-00001001
// CHECK-INST: bfsub za.h[w10, 1, vgx4], { z0.h - z3.h }
// CHECK-ENCODING: [0x09,0x5c,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e55c09 <unknown>
bfsub za.h[w8, 5, vgx4], {z20.h - z23.h} // 11000001-11100101-00011110-10001101
// CHECK-INST: bfsub za.h[w8, 5, vgx4], { z20.h - z23.h }
// CHECK-ENCODING: [0x8d,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e8d <unknown>
bfsub za.h[w8, 5], {z20.h - z23.h} // 11000001-11100101-00011110-10001101
// CHECK-INST: bfsub za.h[w8, 5, vgx4], { z20.h - z23.h }
// CHECK-ENCODING: [0x8d,0x1e,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e51e8d <unknown>
bfsub za.h[w11, 2, vgx4], {z8.h - z11.h} // 11000001-11100101-01111101-00001010
// CHECK-INST: bfsub za.h[w11, 2, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x0a,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d0a <unknown>
bfsub za.h[w11, 2], {z8.h - z11.h} // 11000001-11100101-01111101-00001010
// CHECK-INST: bfsub za.h[w11, 2, vgx4], { z8.h - z11.h }
// CHECK-ENCODING: [0x0a,0x7d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e57d0a <unknown>
bfsub za.h[w9, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-00111101-10001111
// CHECK-INST: bfsub za.h[w9, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x8f,0x3d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e53d8f <unknown>
bfsub za.h[w9, 7], {z12.h - z15.h} // 11000001-11100101-00111101-10001111
// CHECK-INST: bfsub za.h[w9, 7, vgx4], { z12.h - z15.h }
// CHECK-ENCODING: [0x8f,0x3d,0xe5,0xc1]
-// CHECK-ERROR: instruction requires: b16b16 sme2p1
+// CHECK-ERROR: instruction requires: b16b16 sme2
// CHECK-UNKNOWN: c1e53d8f <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2p1/pmov.s b/llvm/test/MC/AArch64/SVE2p1/pmov.s
index ca2b55b..8c08cf7 100644
--- a/llvm/test/MC/AArch64/SVE2p1/pmov.s
+++ b/llvm/test/MC/AArch64/SVE2p1/pmov.s
@@ -20,6 +20,12 @@ pmov p0.h, z0[0] // 00000101-00101100-00111000-00000000
// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
// CHECK-UNKNOWN: 052c3800 <unknown>
+pmov p0.h, z0 // 00000101-00101100-00111000-00000000
+// CHECK-INST: pmov p0.h, z0[0]
+// CHECK-ENCODING: [0x00,0x38,0x2c,0x05]
+// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
+// CHECK-UNKNOWN: 052c3800 <unknown>
+
pmov p5.h, z10[0] // 00000101-00101100-00111001-01000101
// CHECK-INST: pmov p5.h, z10[0]
// CHECK-ENCODING: [0x45,0x39,0x2c,0x05]
@@ -44,6 +50,12 @@ pmov p0.s, z0[0] // 00000101-01101000-00111000-00000000
// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
// CHECK-UNKNOWN: 05683800 <unknown>
+pmov p0.s, z0 // 00000101-01101000-00111000-00000000
+// CHECK-INST: pmov p0.s, z0[0]
+// CHECK-ENCODING: [0x00,0x38,0x68,0x05]
+// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
+// CHECK-UNKNOWN: 05683800 <unknown>
+
pmov p5.s, z10[2] // 00000101-01101100-00111001-01000101
// CHECK-INST: pmov p5.s, z10[2]
// CHECK-ENCODING: [0x45,0x39,0x6c,0x05]
@@ -68,6 +80,12 @@ pmov p0.d, z0[0] // 00000101-10101000-00111000-00000000
// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
// CHECK-UNKNOWN: 05a83800 <unknown>
+pmov p0.d, z0 // 00000101-10101000-00111000-00000000
+// CHECK-INST: pmov p0.d, z0[0]
+// CHECK-ENCODING: [0x00,0x38,0xa8,0x05]
+// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
+// CHECK-UNKNOWN: 05a83800 <unknown>
+
pmov p5.d, z10[6] // 00000101-11101100-00111001-01000101
// CHECK-INST: pmov p5.d, z10[6]
// CHECK-ENCODING: [0x45,0x39,0xec,0x05]
@@ -122,6 +140,12 @@ pmov z0[0], p0.h // 00000101-00101101-00111000-00000000
// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
// CHECK-UNKNOWN: 052d3800 <unknown>
+pmov z0, p0.h // 00000101-00101101-00111000-00000000
+// CHECK-INST: pmov z0[0], p0.h
+// CHECK-ENCODING: [0x00,0x38,0x2d,0x05]
+// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
+// CHECK-UNKNOWN: 052d3800 <unknown>
+
pmov z21[0], p10.h // 00000101-00101101-00111001-01010101
// CHECK-INST: pmov z21[0], p10.h
// CHECK-ENCODING: [0x55,0x39,0x2d,0x05]
@@ -147,6 +171,12 @@ pmov z0[0], p0.s // 00000101-01101001-00111000-00000000
// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
// CHECK-UNKNOWN: 05693800 <unknown>
+pmov z0, p0.s // 00000101-01101001-00111000-00000000
+// CHECK-INST: pmov z0[0], p0.s
+// CHECK-ENCODING: [0x00,0x38,0x69,0x05]
+// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
+// CHECK-UNKNOWN: 05693800 <unknown>
+
pmov z21[2], p10.s // 00000101-01101101-00111001-01010101
// CHECK-INST: pmov z21[2], p10.s
// CHECK-ENCODING: [0x55,0x39,0x6d,0x05]
@@ -171,6 +201,12 @@ pmov z0[0], p0.d // 00000101-10101001-00111000-00000000
// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
// CHECK-UNKNOWN: 05a93800 <unknown>
+pmov z0, p0.d // 00000101-10101001-00111000-00000000
+// CHECK-INST: pmov z0[0], p0.d
+// CHECK-ENCODING: [0x00,0x38,0xa9,0x05]
+// CHECK-ERROR: instruction requires: sme2p1 or sve2p1
+// CHECK-UNKNOWN: 05a93800 <unknown>
+
pmov z21[6], p10.d // 00000101-11101101-00111001-01010101
// CHECK-INST: pmov z21[6], p10.d
// CHECK-ENCODING: [0x55,0x39,0xed,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
new file mode 100644
index 0000000..dbd732f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -0,0 +1,199 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+
+ds_direct_load v1 wait_va_vdst:15
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
+
+ds_direct_load v8 wait_va_vdst:8
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
+
+ds_direct_load v9 wait_va_vdst:7
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
+
+ds_direct_load v10 wait_va_vdst:6
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
+
+ds_direct_load v11 wait_va_vdst:5
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
+
+ds_direct_load v12 wait_va_vdst:4
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
+
+ds_direct_load v13 wait_va_vdst:3
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
+
+ds_direct_load v14 wait_va_vdst:2
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
+
+ds_direct_load v15 wait_va_vdst:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
+
+ds_direct_load v16 wait_va_vdst:0
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
+
+ds_direct_load v17
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x80,0x03,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x81,0x02,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x82,0x01,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x83,0x00,0xce]
+
+ds_param_load v17, attr32.w
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x83,0x00,0xce]
+
+ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+
+ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+
+ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+
+ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+
+ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+
+ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1
+// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+
+ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+
+ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+
+ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+
+ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+
+ds_direct_load v17 wait_vm_vsrc:1
+// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x80,0x83,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1
+// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x81,0x82,0xce]
+
+ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1
+// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce]
+
+ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1
+// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce]
+
+ds_param_load v17, attr32.w wait_vm_vsrc:1
+// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
new file mode 100644
index 0000000..b7c0394
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt
@@ -0,0 +1,206 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce]
+0x0a,0x00,0x16,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce]
+0x0b,0x00,0x15,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce]
+0x0c,0x00,0x14,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce]
+0x0d,0x00,0x13,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce]
+0x0e,0x00,0x12,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce]
+0x0f,0x00,0x11,0xce
+
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce]
+0x10,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce]
+0x11,0x00,0x10,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce]
+0x01,0x00,0x1f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce]
+0x02,0x00,0x1e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce]
+0x03,0x00,0x1d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce]
+0x04,0x00,0x1c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce]
+0x05,0x00,0x1b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce]
+0x06,0x00,0x1a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce]
+0x07,0x00,0x19,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce]
+0x08,0x00,0x18,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce]
+0x09,0x00,0x17,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce]
+0x0a,0x2c,0x06,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce]
+0x0b,0x59,0x05,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x86,0x04,0xce]
+0x0c,0x86,0x04,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0xfc,0x03,0xce]
+0x0d,0xfc,0x03,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0xfd,0x02,0xce]
+0x0e,0xfd,0x02,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0xfe,0x01,0xce]
+0x0f,0xfe,0x01,0xce
+
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0xff,0x00,0xce]
+0x10,0xff,0x00,0xce
+
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0xff,0x00,0xce]
+0x11,0xff,0x00,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce]
+0x01,0x00,0x0f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce]
+0x02,0x01,0x0e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce]
+0x03,0x02,0x0d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce]
+0x04,0x03,0x0c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce]
+0x05,0x00,0x0b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce]
+0x06,0x04,0x0a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce]
+0x07,0x09,0x09,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce]
+0x08,0x0e,0x08,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce]
+0x09,0x13,0x07,0xce
+
+# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce]
+0x0a,0x00,0x96,0xce
+
+# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce]
+0x0b,0x00,0x95,0xce
+
+# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce]
+0x0c,0x00,0x94,0xce
+
+# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce]
+0x0d,0x00,0x93,0xce
+
+# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce]
+0x0e,0x00,0x92,0xce
+
+# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce]
+0x0f,0x00,0x91,0xce
+
+# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce]
+0x10,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce]
+0x11,0x00,0x90,0xce
+
+# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce]
+0x01,0x00,0x9f,0xce
+
+# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce]
+0x02,0x00,0x9e,0xce
+
+# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce]
+0x03,0x00,0x9d,0xce
+
+# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce]
+0x04,0x00,0x9c,0xce
+
+# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce]
+0x05,0x00,0x9b,0xce
+
+# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce]
+0x06,0x00,0x9a,0xce
+
+# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce]
+0x07,0x00,0x99,0xce
+
+# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce]
+0x08,0x00,0x98,0xce
+
+# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce]
+0x09,0x00,0x97,0xce
+
+# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce]
+0x0a,0x2c,0x86,0xce
+
+# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce]
+0x0b,0x59,0x85,0xce
+
+# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x86,0x84,0xce]
+0x0c,0x86,0x84,0xce
+
+# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0xfc,0x83,0xce]
+0x0d,0xfc,0x83,0xce
+
+# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0xfd,0x82,0xce]
+0x0e,0xfd,0x82,0xce
+
+# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0xfe,0x81,0xce]
+0x0f,0xfe,0x81,0xce
+
+# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0xff,0x80,0xce]
+0x10,0xff,0x80,0xce
+
+# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0xff,0x80,0xce]
+0x11,0xff,0x80,0xce
+
+# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce]
+0x01,0x00,0x8f,0xce
+
+# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce]
+0x02,0x01,0x8e,0xce
+
+# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce]
+0x03,0x02,0x8d,0xce
+
+# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce]
+0x04,0x03,0x8c,0xce
+
+# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce]
+0x05,0x00,0x8b,0xce
+
+# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce]
+0x06,0x04,0x8a,0xce
+
+# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce]
+0x07,0x09,0x89,0xce
+
+# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce]
+0x08,0x0e,0x88,0xce
+
+# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce]
+0x09,0x13,0x87,0xce
diff --git a/llvm/test/MC/LoongArch/Misc/cfi-advance.s b/llvm/test/MC/LoongArch/Misc/cfi-advance.s
new file mode 100644
index 0000000..662c43e
--- /dev/null
+++ b/llvm/test/MC/LoongArch/Misc/cfi-advance.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=-relax %s -o %t.o
+# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=RELOC %s
+# RUN: llvm-dwarfdump --debug-frame %t.o | FileCheck --check-prefix=DWARFDUMP %s
+
+# RELOC: Relocations [
+# RELOC-NEXT: .rela.eh_frame {
+# RELOC-NEXT: 0x1C R_LARCH_32_PCREL .text 0x0
+# RELOC-NEXT: }
+# RELOC-NEXT: ]
+# DWARFDUMP: DW_CFA_advance_loc: 4
+# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8
+# DWARFDUMP-NEXT: DW_CFA_advance_loc: 8
+# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8
+
+ .text
+ .globl test
+ .p2align 2
+ .type test,@function
+test:
+ .cfi_startproc
+ nop
+ .cfi_def_cfa_offset 8
+ .p2align 3
+ nop
+ .cfi_def_cfa_offset 8
+ nop
+ .cfi_endproc
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s
index c4454f5..1492265 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s
@@ -23,14 +23,6 @@
# RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0
# RELAX-NEXT: }
# RELAX-NEXT: Section ({{.*}}) .rela.data {
-# RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0
-# RELAX-NEXT: 0xF R_LARCH_SUB8 .L2 0x0
-# RELAX-NEXT: 0x10 R_LARCH_ADD16 .L3 0x0
-# RELAX-NEXT: 0x10 R_LARCH_SUB16 .L2 0x0
-# RELAX-NEXT: 0x12 R_LARCH_ADD32 .L3 0x0
-# RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0
-# RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0
-# RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0
# RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0
# RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0
# RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0
@@ -43,8 +35,8 @@
# RELAX-NEXT: ]
# RELAX: Hex dump of section '.data':
-# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000
-# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000
+# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c
+# RELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000000
# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00
.text
@@ -63,13 +55,12 @@
.short .L2 - .L1
.word .L2 - .L1
.dword .L2 - .L1
-## With relaxation, emit relocs because of the .align making the diff variable.
-## TODO Handle alignment directive. Why they emit relocs now? They returns
-## without folding symbols offset in AttemptToFoldSymbolOffsetDifference().
+## TODO Handle alignment directive.
.byte .L3 - .L2
.short .L3 - .L2
.word .L3 - .L2
.dword .L3 - .L2
+## With relaxation, emit relocs because the la.pcrel makes the diff variable.
.byte .L4 - .L3
.short .L4 - .L3
.word .L4 - .L3
diff --git a/llvm/test/MC/RISCV/cfi-advance.s b/llvm/test/MC/RISCV/cfi-advance.s
index d9224fd..c4af390 100644
--- a/llvm/test/MC/RISCV/cfi-advance.s
+++ b/llvm/test/MC/RISCV/cfi-advance.s
@@ -5,6 +5,8 @@
# CHECK: .rela.eh_frame {
# CHECK-NEXT: 0x1C R_RISCV_32_PCREL <null> 0x0
+# CHECK-NEXT: 0x35 R_RISCV_SET6 <null> 0x0
+# CHECK-NEXT: 0x35 R_RISCV_SUB6 <null> 0x0
# CHECK-NEXT: }
# CHECK-DWARFDUMP: DW_CFA_advance_loc1: 104
# CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8
@@ -12,6 +14,8 @@
# CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8
# CHECK-DWARFDUMP-NEXT: DW_CFA_advance_loc4: 65539
# CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8
+# CHECK-DWARFDUMP-NEXT: DW_CFA_advance_loc: 10
+# CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8
.text
.globl test # -- Begin function test
.p2align 1
@@ -28,4 +32,7 @@ test:
.zero 65535, 0x90
.cfi_def_cfa_offset 8
nop
+ .p2align 3
+ .cfi_def_cfa_offset 8
+ nop
.cfi_endproc
diff --git a/llvm/test/MC/RISCV/fixups-expr.s b/llvm/test/MC/RISCV/fixups-expr.s
index 8a02d29..63e7f2e 100644
--- a/llvm/test/MC/RISCV/fixups-expr.s
+++ b/llvm/test/MC/RISCV/fixups-expr.s
@@ -16,11 +16,15 @@
.globl G1
.globl G2
+.globl G3
.L1:
G1:
call extern
.L2:
G2:
+ .p2align 3
+.L3:
+G3:
.data
.dword .L2-.L1
@@ -31,6 +35,14 @@ G2:
.half G2-G1
.byte .L2-.L1
.byte G2-G1
+.dword .L3-.L2
+.dword G3-G2
+.word .L3-.L2
+.word G3-G2
+.half .L3-.L2
+.half G3-G2
+.byte .L3-.L2
+.byte G3-G2
# RELAX: .rela.data {
# RELAX-NEXT: 0x0 R_RISCV_ADD64 .L2 0x0
# RELAX-NEXT: 0x0 R_RISCV_SUB64 .L1 0x0
@@ -48,4 +60,20 @@ G2:
# RELAX-NEXT: 0x1C R_RISCV_SUB8 .L1 0x0
# RELAX-NEXT: 0x1D R_RISCV_ADD8 G2 0x0
# RELAX-NEXT: 0x1D R_RISCV_SUB8 G1 0x0
+# RELAX-NEXT: 0x1E R_RISCV_ADD64 .L3 0x0
+# RELAX-NEXT: 0x1E R_RISCV_SUB64 .L2 0x0
+# RELAX-NEXT: 0x26 R_RISCV_ADD64 G3 0x0
+# RELAX-NEXT: 0x26 R_RISCV_SUB64 G2 0x0
+# RELAX-NEXT: 0x2E R_RISCV_ADD32 .L3 0x0
+# RELAX-NEXT: 0x2E R_RISCV_SUB32 .L2 0x0
+# RELAX-NEXT: 0x32 R_RISCV_ADD32 G3 0x0
+# RELAX-NEXT: 0x32 R_RISCV_SUB32 G2 0x0
+# RELAX-NEXT: 0x36 R_RISCV_ADD16 .L3 0x0
+# RELAX-NEXT: 0x36 R_RISCV_SUB16 .L2 0x0
+# RELAX-NEXT: 0x38 R_RISCV_ADD16 G3 0x0
+# RELAX-NEXT: 0x38 R_RISCV_SUB16 G2 0x0
+# RELAX-NEXT: 0x3A R_RISCV_ADD8 .L3 0x0
+# RELAX-NEXT: 0x3A R_RISCV_SUB8 .L2 0x0
+# RELAX-NEXT: 0x3B R_RISCV_ADD8 G3 0x0
+# RELAX-NEXT: 0x3B R_RISCV_SUB8 G2 0x0
# RELAX-NEXT: }
diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
index 0d90bc2..5ae559d 100644
--- a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
+++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
@@ -316,12 +316,13 @@ define i1 @sub_nuw_neg_i16(i16 %a) {
; CHECK-LABEL: @sub_nuw_neg_i16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NEG2:%.*]] = sub nuw i16 [[A:%.*]], -305
-; CHECK-NEXT: br i1 false, label [[EXIT_1:%.*]], label [[EXIT_2:%.*]]
+; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i16 0, [[NEG2]]
+; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]]
; CHECK: exit.1:
-; CHECK-NEXT: ret i1 true
+; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i16 [[A]], 0
+; CHECK-NEXT: ret i1 [[C_2]]
; CHECK: exit.2:
-; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i16 [[A]], 0
-; CHECK-NEXT: ret i1 [[C_3]]
+; CHECK-NEXT: ret i1 true
;
entry:
%neg2 = sub nuw i16 %a, -305
@@ -379,3 +380,117 @@ entry:
%c = icmp ugt i64 %neg2, 0
ret i1 %c
}
+
+define i1 @pr76713(i16 %i1, i16 %i3) {
+; CHECK-LABEL: @pr76713(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C1:%.*]] = icmp ult i16 [[I1:%.*]], -1
+; CHECK-NEXT: [[C2:%.*]] = icmp uge i16 [[I1]], -3
+; CHECK-NEXT: [[C3:%.*]] = icmp ult i16 [[I3:%.*]], 2
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[C1]], [[C2]]
+; CHECK-NEXT: [[AND_2:%.*]] = and i1 [[AND]], [[C3]]
+; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i16 [[I1]], -3
+; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = mul nuw nsw i16 [[I3]], 4
+; CHECK-NEXT: [[I6:%.*]] = add nuw nsw i16 [[ARRAYIDX_IDX]], [[SUB]]
+; CHECK-NEXT: [[C4:%.*]] = icmp ult i16 12, [[I6]]
+; CHECK-NEXT: ret i1 [[C4]]
+; CHECK: else:
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %c1 = icmp ult i16 %i1, -1
+ %c2 = icmp uge i16 %i1, -3
+ %c3 = icmp ult i16 %i3, 2
+ %and = and i1 %c1, %c2
+ %and.2 = and i1 %and, %c3
+ br i1 %and, label %then, label %else
+
+then:
+ %sub = sub nuw nsw i16 %i1, -3
+ %arrayidx.idx = mul nuw nsw i16 %i3, 4
+ %i6 = add nuw nsw i16 %arrayidx.idx, %sub
+ %c4 = icmp ult i16 12, %i6
+ ret i1 %c4
+
+else:
+ ret i1 0
+}
+
+define void @sub_nuw_chained_positive_constants(i16 %a) {
+; CHECK-LABEL: @sub_nuw_chained_positive_constants(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB1:%.*]] = sub nuw i16 [[A:%.*]], 10
+; CHECK-NEXT: [[SUB2:%.*]] = sub nuw i16 [[SUB1]], 20
+; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i16 [[SUB2]], 90
+; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]]
+; CHECK: exit.1:
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i16 [[A]], 121
+; CHECK-NEXT: call void @use(i1 [[C_3]])
+; CHECK-NEXT: ret void
+; CHECK: exit.2:
+; CHECK-NEXT: call void @use(i1 false)
+; CHECK-NEXT: call void @use(i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %sub1 = sub nuw i16 %a, 10
+ %sub2 = sub nuw i16 %sub1, 20
+ %c.1 = icmp ugt i16 %sub2, 90
+ br i1 %c.1, label %exit.1, label %exit.2
+
+exit.1:
+ %c.2 = icmp ugt i16 %a, 120
+ call void @use(i1 %c.2)
+ %c.3 = icmp ugt i16 %a, 121
+ call void @use(i1 %c.3)
+ ret void
+
+exit.2:
+ %c.4 = icmp ugt i16 %a, 120
+ call void @use(i1 %c.4)
+ %c.5 = icmp ugt i16 %a, 121
+ call void @use(i1 %c.5)
+ ret void
+}
+
+define void @sub_nuw_chained_negative_constants(i8 %a) {
+; CHECK-LABEL: @sub_nuw_chained_negative_constants(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB1:%.*]] = sub nuw i8 [[A:%.*]], 10
+; CHECK-NEXT: [[SUB2:%.*]] = sub nuw i8 [[SUB1]], -126
+; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[SUB2]], 20
+; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]]
+; CHECK: exit.1:
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i8 [[A]], -95
+; CHECK-NEXT: call void @use(i1 [[C_3]])
+; CHECK-NEXT: ret void
+; CHECK: exit.2:
+; CHECK-NEXT: call void @use(i1 false)
+; CHECK-NEXT: call void @use(i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %sub1 = sub nuw i8 %a, 10
+ %sub2 = sub nuw i8 %sub1, 130
+ %c.1 = icmp ugt i8 %sub2, 20
+ br i1 %c.1, label %exit.1, label %exit.2
+
+exit.1:
+ %c.2 = icmp ugt i8 %a, 160
+ call void @use(i1 %c.2)
+ %c.3 = icmp ugt i8 %a, 161
+ call void @use(i1 %c.3)
+ ret void
+
+
+exit.2:
+ %c.4 = icmp ugt i8 %a, 160
+ call void @use(i1 %c.4)
+ %c.5 = icmp ugt i8 %a, 161
+ call void @use(i1 %c.5)
+ ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/and-xor-merge.ll b/llvm/test/Transforms/InstCombine/and-xor-merge.ll
index 5433364..e6df4e3 100644
--- a/llvm/test/Transforms/InstCombine/and-xor-merge.ll
+++ b/llvm/test/Transforms/InstCombine/and-xor-merge.ll
@@ -40,3 +40,42 @@ define i32 @PR38781(i32 %a, i32 %b) {
%and = and i32 %b.lobit.not, %a.lobit.not
ret i32 %and
}
+
+; (a ^ 4) & (a ^ ~4) -> 0
+define i32 @PR75692_1(i32 %x) {
+; CHECK-LABEL: @PR75692_1
+; CHECK-NEXT: ret i32 0
+;
+ %t2 = xor i32 %x, 4
+ %t3 = xor i32 %x, -5
+ %t4 = and i32 %t2, %t3
+ ret i32 %t4
+}
+
+; (a ^ 4) & (a ^ 3) is not zero
+define i32 @PR75692_2(i32 %x) {
+; CHECK-LABEL: @PR75692_2
+; CHECK-NEXT: %t2 = xor i32 %x, 4
+; CHECK-NEXT: %t3 = xor i32 %x, -4
+; CHECK-NEXT: %t4 = and i32 %t2, %t3
+; CHECK-NEXT: ret i32 %t4
+;
+ %t2 = xor i32 %x, 4
+ %t3 = xor i32 %x, -4
+ %t4 = and i32 %t2, %t3
+ ret i32 %t4
+}
+
+; (a ^ 4) & (b ^ ~4) is not zero, since a != b is possible
+define i32 @PR75692_3(i32 %x, i32 %y) {
+; CHECK-LABEL: @PR75692_3
+; CHECK-NEXT: %t2 = xor i32 %x, 4
+; CHECK-NEXT: %t3 = xor i32 %y, -5
+; CHECK-NEXT: %t4 = and i32 %t2, %t3
+; CHECK-NEXT: ret i32 %t4
+;
+ %t2 = xor i32 %x, 4
+ %t3 = xor i32 %y, -5
+ %t4 = and i32 %t2, %t3
+ ret i32 %t4
+}
diff --git a/llvm/test/Transforms/InstCombine/or-xor.ll b/llvm/test/Transforms/InstCombine/or-xor.ll
index 1d35332..361aab6 100644
--- a/llvm/test/Transforms/InstCombine/or-xor.ll
+++ b/llvm/test/Transforms/InstCombine/or-xor.ll
@@ -1055,3 +1055,42 @@ define i8 @or_nand_xor_common_op_commute3_use3(i8 %x, i8 %y, i8 %z) {
%r = or i8 %xor, %nand
ret i8 %r
}
+
+; (a ^ 4) & (a ^ ~4) -> -1
+define i32 @PR75692_1(i32 %x) {
+; CHECK-LABEL: @PR75692_1(
+; CHECK-NEXT: ret i32 -1
+;
+ %t2 = xor i32 %x, 4
+ %t3 = xor i32 %x, -5
+ %t4 = or i32 %t2, %t3
+ ret i32 %t4
+}
+
+; (a ^ 4) & (a ^ 3) is not -1
+define i32 @PR75692_2(i32 %x) {
+; CHECK-LABEL: @PR75692_2
+; CHECK-NEXT: %t2 = xor i32 %x, 4
+; CHECK-NEXT: %t3 = xor i32 %x, -4
+; CHECK-NEXT: %t4 = or i32 %t2, %t3
+; CHECK-NEXT: ret i32 %t4
+;
+ %t2 = xor i32 %x, 4
+ %t3 = xor i32 %x, -4
+ %t4 = or i32 %t2, %t3
+ ret i32 %t4
+}
+
+; (a ^ 4) & (b ^ ~4) is not -1, since a != b is possible
+define i32 @PR75692_3(i32 %x, i32 %y) {
+; CHECK-LABEL: @PR75692_3
+; CHECK-NEXT: %t2 = xor i32 %x, 4
+; CHECK-NEXT: %t3 = xor i32 %y, -5
+; CHECK-NEXT: %t4 = or i32 %t2, %t3
+; CHECK-NEXT: ret i32 %t4
+;
+ %t2 = xor i32 %x, 4
+ %t3 = xor i32 %y, -5
+ %t4 = or i32 %t2, %t3
+ ret i32 %t4
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll
index 3bccfac..b762c3a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll
@@ -31,3 +31,30 @@ loop:
%i4 = extractelement <2 x float> %ins1, i64 0
br label %loop
}
+
+define void @test1() {
+; CHECK-LABEL: define void @test1() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> zeroinitializer, [[TMP0]]
+; CHECK-NEXT: [[TMP2]] = select <2 x i1> zeroinitializer, <2 x float> [[TMP1]], <2 x float> zeroinitializer
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %ph0 = phi float [ 0.000000e+00, %entry ], [ %i4, %loop ]
+ %ph1 = phi float [ 0.000000e+00, %entry ], [ %i5, %loop ]
+ %i = fadd float 0.000000e+00, %ph0
+ %i1 = fadd float 0.000000e+00, %ph1
+ %i2 = select i1 false, float %i, float 0.000000e+00
+ %i3 = select i1 false, float %i1, float 0.000000e+00
+ %ins0 = insertelement <2 x float> zeroinitializer, float %i2, i64 0
+ %ins1 = insertelement <2 x float> %ins0, float %i3, i64 1
+ %i4 = extractelement <2 x float> %ins1, i64 0
+ %i5 = extractelement <2 x float> %ins1, i64 1
+ br label %loop
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default-lookup-table.ll
new file mode 100644
index 0000000..bead0dc
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default-lookup-table.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt %s -S -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -switch-range-to-icmp | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define i64 @test_1(i64 %0) {
+; CHECK-LABEL: define i64 @test_1(
+; CHECK-SAME: i64 [[TMP0:%.*]]) {
+; CHECK-NEXT: switch.lookup:
+; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i64], ptr @switch.table.test_1, i32 0, i64 [[TMP1]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8
+; CHECK-NEXT: ret i64 [[SWITCH_LOAD]]
+;
+ %2 = urem i64 %0, 4
+ switch i64 %2, label %5 [
+ i64 1, label %3
+ i64 2, label %3
+ i64 3, label %4
+ ]
+
+3:
+ br label %5
+
+4:
+ br label %5
+
+5:
+ %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ]
+ ret i64 %.0
+}
+
+
+define i64 @test_2(i64 %0) {
+; CHECK-LABEL: define i64 @test_2(
+; CHECK-SAME: i64 [[TMP0:%.*]]) {
+; CHECK-NEXT: switch.lookup:
+; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT: ret i64 [[TMP1]]
+;
+ %2 = urem i64 %0, 4
+ switch i64 %2, label %6 [
+ i64 1, label %3
+ i64 2, label %4
+ i64 3, label %5
+ ]
+
+3:
+ br label %6
+
+4:
+ br label %6
+
+5:
+ br label %6
+
+6:
+ %.0 = phi i64 [ 0, %1 ], [ 1, %3 ], [ 2, %4 ], [ 3, %5 ]
+ ret i64 %.0
+}
+
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index 7c0d5e4..e30a535 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -79,15 +79,15 @@ default:
ret void
}
-; This one is a negative test - we know the value of the default,
-; but that's about it
+; We can replace the default branch with case 3 since it is the only case that is missing.
define void @test3(i2 %a) {
; CHECK-LABEL: define void @test3(
; CHECK-SAME: i2 [[A:%.*]]) {
-; CHECK-NEXT: switch i2 [[A]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT: switch i2 [[A]], label [[DOTUNREACHABLEDEFAULT:%.*]] [
; CHECK-NEXT: i2 0, label [[CASE0:%.*]]
; CHECK-NEXT: i2 1, label [[CASE1:%.*]]
; CHECK-NEXT: i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT: i2 -1, label [[DEFAULT:%.*]]
; CHECK-NEXT: ]
; CHECK: common.ret:
; CHECK-NEXT: ret void
@@ -100,6 +100,8 @@ define void @test3(i2 %a) {
; CHECK: case2:
; CHECK-NEXT: call void @foo(i32 2)
; CHECK-NEXT: br label [[COMMON_RET]]
+; CHECK: .unreachabledefault:
+; CHECK-NEXT: unreachable
; CHECK: default:
; CHECK-NEXT: call void @foo(i32 3)
; CHECK-NEXT: br label [[COMMON_RET]]
@@ -122,6 +124,50 @@ default:
ret void
}
+define void @test3_prof(i2 %a) {
+; CHECK-LABEL: define void @test3_prof(
+; CHECK-SAME: i2 [[A:%.*]]) {
+; CHECK-NEXT: switch i2 [[A]], label [[DOTUNREACHABLEDEFAULT:%.*]] [
+; CHECK-NEXT: i2 0, label [[CASE0:%.*]]
+; CHECK-NEXT: i2 1, label [[CASE1:%.*]]
+; CHECK-NEXT: i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT: i2 -1, label [[DEFAULT:%.*]]
+; CHECK-NEXT: ], !prof [[PROF0:![0-9]+]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: case0:
+; CHECK-NEXT: call void @foo(i32 0)
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: case1:
+; CHECK-NEXT: call void @foo(i32 1)
+; CHECK-NEXT: br label [[COMMON_RET]]
+; CHECK: case2:
+; CHECK-NEXT: call void @foo(i32 2)
+; CHECK-NEXT: br label [[COMMON_RET]]
+; CHECK: .unreachabledefault:
+; CHECK-NEXT: unreachable
+; CHECK: default:
+; CHECK-NEXT: call void @foo(i32 3)
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+ switch i2 %a, label %default [i2 0, label %case0
+ i2 1, label %case1
+ i2 2, label %case2], !prof !0
+
+case0:
+ call void @foo(i32 0)
+ ret void
+case1:
+ call void @foo(i32 1)
+ ret void
+case2:
+ call void @foo(i32 2)
+ ret void
+default:
+ call void @foo(i32 3)
+ ret void
+}
+
; Negative test - check for possible overflow when computing
; number of possible cases.
define void @test4(i128 %a) {
@@ -267,3 +313,7 @@ default:
declare void @llvm.assume(i1)
+!0 = !{!"branch_weights", i32 8, i32 4, i32 2, i32 1}
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 0, i32 4, i32 2, i32 1, i32 8}
+;.
diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll
index a407986..67ca00b 100644
--- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
+++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
@@ -65,6 +65,32 @@ define float @call_llvm.log10.f32(float %in) {
}
declare float @llvm.log10.f32(float) #0
+
+; SVML: declare <2 x double> @__svml_sin2(<2 x double>)
+; SVML: declare <4 x double> @__svml_sin4(<4 x double>)
+; SVML: declare <8 x double> @__svml_sin8(<8 x double>)
+; SVML: declare <4 x float> @__svml_log10f4(<4 x float>)
+; SVML: declare <8 x float> @__svml_log10f8(<8 x float>)
+; SVML: declare <16 x float> @__svml_log10f16(<16 x float>)
+
+; MASSV: declare <2 x double> @__sind2(<2 x double>)
+; MASSV: declare <4 x float> @__log10f4(<4 x float>)
+
+; LIBMVEC-X86: declare <2 x double> @_ZGVbN2v_sin(<2 x double>)
+; LIBMVEC-X86: declare <4 x double> @_ZGVdN4v_sin(<4 x double>)
+
+; ACCELERATE: declare <4 x float> @vlog10f(<4 x float>)
+
+; SLEEFGNUABI: declare <2 x double> @_ZGVnN2v_sin(<2 x double>)
+; SLEEFGNUABI: declare <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double>, <vscale x 2 x i1>)
+; SLEEFGNUABI: declare <4 x float> @_ZGVnN4v_log10f(<4 x float>)
+; SLEEFGNUABI: declare <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float>, <vscale x 4 x i1>)
+
+; ARMPL: declare <2 x double> @armpl_vsinq_f64(<2 x double>)
+; ARMPL: declare <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double>, <vscale x 2 x i1>)
+; ARMPL: declare <4 x float> @armpl_vlog10q_f32(<4 x float>)
+; ARMPL: declare <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float>, <vscale x 4 x i1>)
+
attributes #0 = { nounwind readnone }
; SVML: attributes #[[SIN]] = { "vector-function-abi-variant"=
diff --git a/llvm/test/tools/llvm-cxxfilt/no-params.test b/llvm/test/tools/llvm-cxxfilt/no-params.test
deleted file mode 100644
index 17cbbbe..0000000
--- a/llvm/test/tools/llvm-cxxfilt/no-params.test
+++ /dev/null
@@ -1,34 +0,0 @@
-RUN: llvm-cxxfilt _Z3fooILZ3BarEET_f _Z3fooIPFcfEET_d _ZN1f2baC2ERKNS_2baIT_EE _Z3foov.123 | FileCheck %s --check-prefix=CHECK-PARAMS
-RUN: llvm-cxxfilt -p _Z3fooILZ3BarEET_f _Z3fooIPFcfEET_d _ZN1f2baC2ERKNS_2baIT_EE _Z3foov.123 | FileCheck %s --check-prefix=CHECK-NO-PARAMS --match-full-lines
-RUN: llvm-cxxfilt --no-params _Z3fooILZ3BarEET_f _Z3fooIPFcfEET_d _ZN1f2baC2ERKNS_2baIT_EE _Z3foov.123 | FileCheck %s --check-prefix=CHECK-NO-PARAMS --match-full-lines
-
-# Check that -p or --no-params flag omits function parameters and the return
-# type.
-
-CHECK-PARAMS: Bar foo<Bar>(float)
-CHECK-NO-PARAMS: foo<Bar>
-
-# Check that only the top-level function is impacted by the switch, and that
-# nested function types in the encoding (e.g. where a function type is being
-# used as a template parameter) still include their parameters.
-#
-# template <typename T> T foo(double);
-# typedef char (*F)(float);
-# F foo<F>(double)
-
-CHECK-PARAMS: char (*foo<char (*)(float)>(double))(float)
-CHECK-NO-PARAMS: foo<char (*)(float)>
-
-# Use an invalid mangled name broken in the function parameters to check how -p
-# or --no-params flag works. If the option is given we should be able to
-# demangle the function name just fine. If it is not given, demangling will fail
-# because of the invalid params.
-
-CHECK-PARAMS: _ZN1f2baC2ERKNS_2baIT_EE
-CHECK-NO-PARAMS: f::ba::ba
-
-# Check that a vendor specific suffix is also omitted when --no-params is
-# specified. This matches c++filt's behaviour.
-
-CHECK-PARAMS: foo() (.123)
-CHECK-NO-PARAMS: foo