aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll4
-rw-r--r--llvm/test/CodeGen/Hexagon/fmul-v67.ll8
-rw-r--r--llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll149
-rw-r--r--llvm/test/CodeGen/NVPTX/tcgen05-commit.ll152
-rw-r--r--llvm/test/CodeGen/NVPTX/tcgen05-cp.ll450
-rw-r--r--llvm/test/CodeGen/NVPTX/tcgen05-shift.ll21
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbkb.ll6
-rw-r--r--llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll27
-rw-r--r--llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll18
-rw-r--r--llvm/test/Instrumentation/AddressSanitizer/calls-only.ll42
-rw-r--r--llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll92
-rw-r--r--llvm/test/Transforms/Coroutines/coro-elide-safe.ll (renamed from llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll)28
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll32
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/max-path-length.ll6
-rw-r--r--llvm/test/Transforms/GVN/assume-equal.ll49
-rw-r--r--llvm/test/Transforms/InstCombine/ptrtoaddr.ll77
-rw-r--r--llvm/test/Transforms/InstSimplify/ptr_diff.ll59
-rw-r--r--llvm/test/Transforms/LICM/vector-intrinsics.ll176
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/single_early_exit.ll47
-rw-r--r--llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll1
-rw-r--r--llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s4
22 files changed, 1118 insertions, 332 deletions
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
index 8d091a0..d380104 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
@@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void umin(unsigned a, unsigned b) {
; a *= 2;
@@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void smin(signed a, signed b) {
; a *= 2;
diff --git a/llvm/test/CodeGen/Hexagon/fmul-v67.ll b/llvm/test/CodeGen/Hexagon/fmul-v67.ll
index 49098cd..fc0b7f7 100644
--- a/llvm/test/CodeGen/Hexagon/fmul-v67.ll
+++ b/llvm/test/CodeGen/Hexagon/fmul-v67.ll
@@ -29,7 +29,7 @@ b2:
; CHECK: [[R22]] += dfmpylh([[R20]],[[R21]])
; CHECK: [[R22]] += dfmpylh([[R21]],[[R20]])
; CHECK: [[R22]] += dfmpyhh([[R20]],[[R21]])
-define double @test_02(double %a0, double %a1) #2 {
+define double @test_02(double %a0, double %a1) #1 {
b2:
%v3 = fmul double %a0, %a1
ret double %v3
@@ -40,13 +40,11 @@ b2:
; CHECK: [[R30]] += dfmpylh(r1:0,r3:2)
; CHECK: [[R30]] += dfmpylh(r3:2,r1:0)
; CHECK: [[R30]] += dfmpyhh(r1:0,r3:2)
-define double @test_03(double %a0, double %a1) #3 {
+define double @test_03(double %a0, double %a1) #1 {
b2:
- %v3 = fmul double %a0, %a1
+ %v3 = fmul afn double %a0, %a1
ret double %v3
}
attributes #0 = { nounwind }
attributes #1 = { nounwind "target-cpu"="hexagonv67" }
-attributes #2 = { nounwind "target-cpu"="hexagonv67" "unsafe-fp-math"="false" }
-attributes #3 = { nounwind "target-cpu"="hexagonv67" "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll
index 41a0e81..1edb387 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll
@@ -12,63 +12,104 @@ declare void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
declare void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols)
declare void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols)
-; CHECK-LABEL: test_tcgen05_alloc
-define void @test_tcgen05_alloc(ptr %addr, i32 %ncols) {
-; CHECK_PTX64-LABEL: test_tcgen05_alloc(
+define void @test_tcgen05_alloc_cg1(ptr %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0];
-; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
-; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols)
- call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
+ ret void
+}
+define void @test_tcgen05_alloc_cg2(ptr %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
+; CHECK_PTX64_SHARED32-NEXT: ret;
+ call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
ret void
}
-; CHECK-LABEL: test_tcgen05_alloc_shared
-define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) {
-; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared(
+define void @test_tcgen05_alloc_shared_cg1(ptr addrspace(3) %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_param_0];
-; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_1];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%rd1], %r1;
-; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%r1], %r2;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols)
+ ret void
+}
+define void @test_tcgen05_alloc_shared_cg2(ptr addrspace(3) %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols)
ret void
}
@@ -76,31 +117,50 @@ define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) {
declare void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols)
declare void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols)
-; CHECK-LABEL: test_tcgen05_dealloc
-define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) {
-; CHECK_PTX64-LABEL: test_tcgen05_dealloc(
+define void @test_tcgen05_dealloc_cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0];
-; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
-; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols)
+ ret void
+}
+define void @test_tcgen05_dealloc_cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols)
ret void
}
@@ -108,27 +168,42 @@ define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) {
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1()
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2()
-; CHECK-LABEL: test_tcgen05_relinquish_alloc_permit
-define void @test_tcgen05_relinquish_alloc_permit() {
-; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit(
+define void @test_tcgen05_relinquish_alloc_permit_cg1() {
+; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
-; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1()
+ ret void
+}
+define void @test_tcgen05_relinquish_alloc_permit_cg2() {
+; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2()
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll
index 7981feb..2e80c4c 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll
@@ -11,57 +11,93 @@ declare void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr)
declare void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr)
declare void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr)
-; CHECK-LABEL: test_tcgen05_commit
-define void @test_tcgen05_commit(ptr %bar_addr) {
-; CHECK_PTX64-LABEL: test_tcgen05_commit(
+define void @test_tcgen05_commit_cg1(ptr %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0];
; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
-; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.cg1(ptr %bar_addr)
+ ret void
+}
+
+define void @test_tcgen05_commit_cg2(ptr %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0];
+; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr)
ret void
}
-; CHECK-LABEL: test_tcgen05_commit_shared
-define void @test_tcgen05_commit_shared(ptr addrspace(3) %bar_addr) {
-; CHECK_PTX64-LABEL: test_tcgen05_commit_shared(
+define void @test_tcgen05_commit_shared_cg1(ptr addrspace(3) %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_param_0];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg1_param_0];
; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
-; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg1_param_0];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%r1];
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1];
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr)
+ ret void
+}
+
+define void @test_tcgen05_commit_shared_cg2(ptr addrspace(3) %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg2_param_0];
+; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1];
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr)
ret void
@@ -72,66 +108,106 @@ declare void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %bar_addr, i16 %cta_mask)
declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask)
declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask)
-; CHECK-LABEL: test_tcgen05_commit_mc
-define void @test_tcgen05_commit_mc(ptr %bar_addr, i16 %cta_mask) {
-; CHECK_PTX64-LABEL: test_tcgen05_commit_mc(
+define void @test_tcgen05_commit_mc_cg1(ptr %bar_addr, i16 %cta_mask) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b16 %rs<2>;
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0];
-; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
-; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>;
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.mc.cg1(ptr %bar_addr, i16 %cta_mask)
+ ret void
+}
+define void @test_tcgen05_commit_mc_cg2(ptr %bar_addr, i16 %cta_mask) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b16 %rs<2>;
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>;
+; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %bar_addr, i16 %cta_mask)
-
ret void
}
-; CHECK-LABEL: test_tcgen05_commit_mc_shared
-define void @test_tcgen05_commit_mc_shared(ptr addrspace(3) %bar_addr, i16 %cta_mask) {
-; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared(
+define void @test_tcgen05_commit_mc_shared_cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b16 %rs<2>;
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_param_0];
-; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_param_1];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
-; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>;
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask)
+ ret void
+}
+define void @test_tcgen05_commit_mc_shared_cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b16 %rs<2>;
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>;
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask)
-
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll
index c540f78..817b1d5 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll
@@ -4,346 +4,580 @@
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
-; CHECK-LABEL: test_tcgen05_cp_64x128_v1
-define void @test_tcgen05_cp_64x128_v1(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_64x128_v1(
+define void @test_tcgen05_cp_64x128_v1_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_64x128_v1_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_64x128_v2
-define void @test_tcgen05_cp_64x128_v2(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_64x128_v2(
+define void @test_tcgen05_cp_64x128_v2_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_64x128_v2_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_32x128
-define void @test_tcgen05_cp_32x128(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_32x128(
+define void @test_tcgen05_cp_32x128_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_32x128_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_32x128_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_32x128_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_128x128b
-define void @test_tcgen05_cp_128x128b(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_128x128b(
+define void @test_tcgen05_cp_128x128b_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x128b_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_128x128b_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x128b_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_128x256b
-define void @test_tcgen05_cp_128x256b(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_128x256b(
+define void @test_tcgen05_cp_128x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x256b_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_128x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x256b_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_4x256b
-define void @test_tcgen05_cp_4x256b(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_4x256b(
+define void @test_tcgen05_cp_4x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_4x256b_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_4x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_4x256b_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
; With src_fmt as b6x16_p32
-; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32
-define void @test_tcgen05_cp_128x256b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32(
+define void @test_tcgen05_cp_128x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b6x16_p32 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_128x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32
-define void @test_tcgen05_cp_4x256b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32(
+define void @test_tcgen05_cp_4x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b6x16_p32 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_4x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32
-define void @test_tcgen05_cp_128x128b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32(
+define void @test_tcgen05_cp_128x128b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b6x16_p32 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_128x128b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32
-define void @test_tcgen05_cp_64x128_v1_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32(
+define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32
-define void @test_tcgen05_cp_64x128_v2_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32(
+define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32
-define void @test_tcgen05_cp_32x128_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32(
+define void @test_tcgen05_cp_32x128_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_32x128_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
; With src_fmt as b4x16_p64
-; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64
-define void @test_tcgen05_cp_128x256b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64(
+define void @test_tcgen05_cp_128x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b4x16_p64 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_128x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64
-define void @test_tcgen05_cp_4x256b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64(
+define void @test_tcgen05_cp_4x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b4x16_p64 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_4x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64
-define void @test_tcgen05_cp_128x128b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64(
+define void @test_tcgen05_cp_128x128b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b4x16_p64 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_128x128b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64
-define void @test_tcgen05_cp_64x128_v1_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64(
+define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64
-define void @test_tcgen05_cp_64x128_v2_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64(
+define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
}
-; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64
-define void @test_tcgen05_cp_32x128_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) {
-; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64(
+define void @test_tcgen05_cp_32x128_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_param_0];
-; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_param_1];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_1];
; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1;
-; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc)
+
+ ret void
+}
+
+define void @test_tcgen05_cp_32x128_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) {
+; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_1];
+; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1;
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc)
ret void
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll
index 8ca6a2a0..bf2adac 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll
@@ -7,18 +7,29 @@
declare void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr)
declare void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr)
-; CHECK-LABEL: test_tcgen05_shift
-define void @test_tcgen05_shift(ptr addrspace(6) %tmem_addr) {
-; CHECK-LABEL: test_tcgen05_shift(
+define void @test_tcgen05_shift_cg1(ptr addrspace(6) %tmem_addr) {
+; CHECK-LABEL: test_tcgen05_shift_cg1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_param_0];
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_cg1_param_0];
; CHECK-NEXT: tcgen05.shift.cta_group::1.down [%r1];
-; CHECK-NEXT: tcgen05.shift.cta_group::2.down [%r1];
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr)
+
+ ret void
+}
+
+define void @test_tcgen05_shift_cg2(ptr addrspace(6) %tmem_addr) {
+; CHECK-LABEL: test_tcgen05_shift_cg2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_cg2_param_0];
+; CHECK-NEXT: tcgen05.shift.cta_group::2.down [%r1];
+; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr)
ret void
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index 4537d18..b2ad8d7 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -441,7 +441,7 @@ define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packh a0, a0, a1
-; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packh a1, a2, a3
; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: sw a0, 0(a4)
; RV64ZBKB-NEXT: ret
@@ -477,7 +477,7 @@ define void @pack_lo_packh_hi_packh_3(i8 %0, i8 %1, i8 %2, i8 %3, ptr %p) nounwi
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_3:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packh a0, a0, a1
-; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packh a1, a2, a3
; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: sw a0, 0(a4)
; RV64ZBKB-NEXT: ret
@@ -509,7 +509,7 @@ define i32 @pack_lo_packh_hi_packh_4(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_4:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packh a0, a0, a1
-; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packh a1, a2, a3
; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: ret
%a = zext i8 %0 to i32
diff --git a/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll
deleted file mode 100644
index a4846176..0000000
--- a/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; Test that ASAN will not instrument lifetime markers on alloca offsets.
-;
-; RUN: opt < %s -passes=asan --asan-use-after-scope -S | FileCheck %s
-
-target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.15.0"
-
-%t = type { ptr, ptr, %sub, i64 }
-%sub = type { i32 }
-
-define void @foo() sanitize_address {
-entry:
- %0 = alloca %t, align 8
- %x = getelementptr inbounds %t, ptr %0, i64 0, i32 2
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
- call void @bar(ptr nonnull %x)
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) #3
- ret void
-}
-
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-declare void @bar(ptr)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
-
-; CHECK: store i64 %[[STACK_BASE:.+]], ptr %asan_local_stack_base, align 8
-; CHECK-NOT: store i8 0
-; CHECK: call void @bar(ptr nonnull %x)
diff --git a/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll b/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll
index 0859a7e..d7204e6 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll
@@ -9,15 +9,15 @@ define void @foo() #0 {
entry:
%array01 = alloca [1 x i8], align 1
%array02 = alloca [2 x i8], align 1
-; OUTLINE: call void @__asan_set_shadow_f1(i64 %23, i64 4)
-; OUTLINE: call void @__asan_set_shadow_01(i64 %24, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f2(i64 %25, i64 1)
-; OUTLINE: call void @__asan_set_shadow_02(i64 %26, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f3(i64 %27, i64 1)
-; OUTLINE: call void @__asan_stack_free_0(i64 %7, i64 64)
-; OUTLINE: call void @__asan_set_shadow_00(i64 %55, i64 8)
-; INLINE: store i64 -935919682371587599, ptr %24, align 1
-; INLINE: store i64 -723401728380766731, ptr %52, align 1
+; OUTLINE: call void @__asan_set_shadow_f1(i64 %{{.+}}, i64 4)
+; OUTLINE: call void @__asan_set_shadow_01(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_02(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f3(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_stack_free_0(i64 %{{.+}}, i64 64)
+; OUTLINE: call void @__asan_set_shadow_00(i64 %{{.+}}, i64 8)
+; INLINE: store i64 -935919682371587599, ptr %{{.+}}, align 1
+; INLINE: store i64 -723401728380766731, ptr %{{.+}}, align 1
%arrayidx = getelementptr inbounds [1 x i8], ptr %array01, i64 0, i64 1
store i8 1, ptr %arrayidx, align 1
%arrayidx1 = getelementptr inbounds [2 x i8], ptr %array02, i64 0, i64 2
diff --git a/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll b/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll
index 5f122ad..6f52289 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll
@@ -14,26 +14,26 @@ entry:
%array05 = alloca [5 x i8], align 1
%array06 = alloca [6 x i8], align 1
%array07 = alloca [7 x i8], align 1
-; OUTLINE: call void @__asan_set_shadow_f1(i64 %33, i64 4)
-; OUTLINE: call void @__asan_set_shadow_01(i64 %34, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f2(i64 %35, i64 1)
-; OUTLINE: call void @__asan_set_shadow_02(i64 %36, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f2(i64 %37, i64 1)
-; OUTLINE: call void @__asan_set_shadow_03(i64 %38, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f2(i64 %39, i64 1)
-; OUTLINE: call void @__asan_set_shadow_04(i64 %40, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f2(i64 %41, i64 1)
-; OUTLINE: call void @__asan_set_shadow_05(i64 %42, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f2(i64 %43, i64 3)
-; OUTLINE: call void @__asan_set_shadow_06(i64 %44, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f2(i64 %45, i64 3)
-; OUTLINE: call void @__asan_set_shadow_07(i64 %46, i64 1)
-; OUTLINE: call void @__asan_set_shadow_f3(i64 %47, i64 3)
-; OUTLINE: call void @__asan_stack_free_2(i64 %7, i64 192)
-; OUTLINE: call void @__asan_set_shadow_00(i64 %135, i64 24)
-; INLINE: store i64 -1007977276409515535, ptr %34, align 1
-; INLINE: store i64 -940423264817843709, ptr %36, align 1
-; INLINE: store i64 -868083087686045178, ptr %38, align 1
+; OUTLINE: call void @__asan_set_shadow_f1(i64 %{{.+}}, i64 4)
+; OUTLINE: call void @__asan_set_shadow_01(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_02(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_03(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_04(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_05(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 3)
+; OUTLINE: call void @__asan_set_shadow_06(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 3)
+; OUTLINE: call void @__asan_set_shadow_07(i64 %{{.+}}, i64 1)
+; OUTLINE: call void @__asan_set_shadow_f3(i64 %{{.+}}, i64 3)
+; OUTLINE: call void @__asan_stack_free_2(i64 %{{.+}}, i64 192)
+; OUTLINE: call void @__asan_set_shadow_00(i64 %{{.+}}, i64 24)
+; INLINE: store i64 -1007977276409515535, ptr %{{.+}}, align 1
+; INLINE: store i64 -940423264817843709, ptr %{{.+}}, align 1
+; INLINE: store i64 -868083087686045178, ptr %{{.+}}, align 1
%arrayidx = getelementptr inbounds [1 x i8], ptr %array01, i64 0, i64 1
store i8 1, ptr %arrayidx, align 1
%arrayidx1 = getelementptr inbounds [2 x i8], ptr %array02, i64 0, i64 2
@@ -48,7 +48,7 @@ entry:
store i8 6, ptr %arrayidx5, align 1
%arrayidx6 = getelementptr inbounds [7 x i8], ptr %array07, i64 0, i64 7
store i8 7, ptr %arrayidx6, align 1
-; CHECK-NOT: store i64 -723401728380766731, ptr %126, align 1
+; CHECK-NOT: store i64 -723401728380766731, ptr %{{.+}}, align 1
ret void
}
attributes #0 = { noinline nounwind optnone sanitize_address ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" }
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll
index 3568434..07b9a1c 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=2 -S | FileCheck %s
+; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-stack-depth -sanitizer-coverage-stack-depth-callback-min=1 -S | FileCheck %s --check-prefix=CHECK-STACK-CALLBACK
+; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-stack-depth -S | FileCheck %s --check-prefix=CHECK-STACK-DEPTH
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -55,6 +57,86 @@ entry:
ret i32 %t
}
+define i32 @with_dbg_stack_callback(ptr %a) !dbg !8 {
+; CHECK-STACK-CALLBACK-LABEL: define i32 @with_dbg_stack_callback(
+; CHECK-STACK-CALLBACK-SAME: ptr [[A:%.*]]) !dbg [[DBG8:![0-9]+]] {
+; CHECK-STACK-CALLBACK-NEXT: entry:
+; CHECK-STACK-CALLBACK-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1
+; CHECK-STACK-CALLBACK-NEXT: call void @__sanitizer_cov_stack_depth() #[[ATTR1:[0-9]+]], !dbg [[DBG9:![0-9]+]]
+; CHECK-STACK-CALLBACK-NEXT: %t = load i32, ptr [[A]], align 4
+; CHECK-STACK-CALLBACK-NEXT: call void @external_func()
+; CHECK-STACK-CALLBACK-NEXT: ret i32 %t
+;
+entry:
+ %buf = alloca [64 x i8], align 1
+ %t = load i32, ptr %a, align 4
+ call void @external_func()
+ ret i32 %t
+}
+
+define i32 @with_dbg_stack_depth(ptr %a) !dbg !10 {
+; CHECK-STACK-DEPTH-LABEL: define i32 @with_dbg_stack_depth(
+; CHECK-STACK-DEPTH-SAME: ptr [[A:%.*]]) !dbg [[DBG10:![0-9]+]] {
+; CHECK-STACK-DEPTH-NEXT: entry:
+; CHECK-STACK-DEPTH-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1
+; CHECK-STACK-DEPTH-NEXT: [[TMP1:%.*]] = call ptr @llvm.frameaddress.p0(i32 0)
+; CHECK-STACK-DEPTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-STACK-DEPTH-NEXT: [[TMP3:%.*]] = load i64, ptr @__sancov_lowest_stack, align 8
+; CHECK-STACK-DEPTH-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP2]], [[TMP3]]
+; CHECK-STACK-DEPTH-NEXT: br i1 [[TMP4]], label {{%.*}}, label {{%.*}}
+; CHECK-STACK-DEPTH: store i64 [[TMP2]], ptr @__sancov_lowest_stack, align 8, !dbg [[DBG11:![0-9]+]], {{.*}}!nosanitize
+; CHECK-STACK-DEPTH: %t = load i32, ptr [[A]], align 4
+; CHECK-STACK-DEPTH-NEXT: call void @external_func()
+; CHECK-STACK-DEPTH-NEXT: ret i32 %t
+;
+entry:
+ %buf = alloca [64 x i8], align 1
+ %t = load i32, ptr %a, align 4
+ call void @external_func()
+ ret i32 %t
+}
+
+define i32 @without_dbg_stack_callback(ptr %a) {
+; CHECK-STACK-CALLBACK-LABEL: define i32 @without_dbg_stack_callback(
+; CHECK-STACK-CALLBACK-SAME: ptr [[A:%.*]]) {
+; CHECK-STACK-CALLBACK-NEXT: entry:
+; CHECK-STACK-CALLBACK-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1
+; CHECK-STACK-CALLBACK-NEXT: call void @__sanitizer_cov_stack_depth() #[[ATTR1]]
+; CHECK-STACK-CALLBACK-NEXT: %t = load i32, ptr [[A]], align 4
+; CHECK-STACK-CALLBACK-NEXT: call void @external_func()
+; CHECK-STACK-CALLBACK-NEXT: ret i32 %t
+;
+entry:
+ %buf = alloca [64 x i8], align 1
+ %t = load i32, ptr %a, align 4
+ call void @external_func()
+ ret i32 %t
+}
+
+define i32 @without_dbg_stack_depth(ptr %a) {
+; CHECK-STACK-DEPTH-LABEL: define i32 @without_dbg_stack_depth(
+; CHECK-STACK-DEPTH-SAME: ptr [[A:%.*]]) {
+; CHECK-STACK-DEPTH-NEXT: entry:
+; CHECK-STACK-DEPTH-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1
+; CHECK-STACK-DEPTH-NEXT: [[TMP1:%.*]] = call ptr @llvm.frameaddress.p0(i32 0)
+; CHECK-STACK-DEPTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-STACK-DEPTH-NEXT: [[TMP3:%.*]] = load i64, ptr @__sancov_lowest_stack, align 8
+; CHECK-STACK-DEPTH-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP2]], [[TMP3]]
+; CHECK-STACK-DEPTH-NEXT: br i1 [[TMP4]], label {{%.*}}, label {{%.*}}
+; CHECK-STACK-DEPTH: store i64 [[TMP2]], ptr @__sancov_lowest_stack, align 8, {{.*}}!nosanitize
+; CHECK-STACK-DEPTH: %t = load i32, ptr [[A]], align 4
+; CHECK-STACK-DEPTH-NEXT: call void @external_func()
+; CHECK-STACK-DEPTH-NEXT: ret i32 %t
+;
+entry:
+ %buf = alloca [64 x i8], align 1
+ %t = load i32, ptr %a, align 4
+ call void @external_func()
+ ret i32 %t
+}
+
+declare void @external_func()
+
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2}
@@ -66,6 +148,10 @@ entry:
!5 = !{}
!6 = !DILocation(line: 192, scope: !3)
!7 = !DILocation(line: 0, scope: !3)
+!8 = distinct !DISubprogram(name: "with_dbg_stack_callback", scope: !1, file: !1, line: 200, type: !4, scopeLine: 200, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = !DILocation(line: 200, scope: !8)
+!10 = distinct !DISubprogram(name: "with_dbg_stack_depth", scope: !1, file: !1, line: 210, type: !4, scopeLine: 210, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = !DILocation(line: 210, scope: !10)
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C89, file: [[META1:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None)
@@ -76,3 +162,9 @@ entry:
; CHECK: [[DBG6]] = !DILocation(line: 192, scope: [[DBG3]])
; CHECK: [[DBG7]] = !DILocation(line: 0, scope: [[DBG3]])
;.
+; CHECK-STACK-CALLBACK: [[DBG8]] = distinct !DISubprogram(name: "with_dbg_stack_callback", scope: {{.*}}, file: {{.*}}, line: 200
+; CHECK-STACK-CALLBACK: [[DBG9]] = !DILocation(line: 200, scope: [[DBG8]])
+;.
+; CHECK-STACK-DEPTH: [[DBG10]] = distinct !DISubprogram(name: "with_dbg_stack_depth", scope: {{.*}}, file: {{.*}}, line: 210
+; CHECK-STACK-DEPTH: [[DBG11]] = !DILocation(line: 210, scope: [[DBG10]])
+;.
diff --git a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll
index 4eec7ed..722693d 100644
--- a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll
+++ b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll
@@ -1,4 +1,8 @@
-; Testing elide performed its job for calls to coroutines marked safe.
+; Coroutine calls marked with `coro_elide_safe` should be elided.
+; Inside `caller`, we expect the `callee` coroutine to be elided.
+; Inside `caller_conditional`, `callee` is only called on an unlikely
+; path, hence we expect the `callee` coroutine NOT to be elided.
+;
; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' | FileCheck %s
%struct.Task = type { ptr }
@@ -57,7 +61,7 @@ define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) {
; Function Attrs: presplitcoroutine
define ptr @caller() #0 {
entry:
- %task = call ptr @callee(i8 0) #1
+ %task = call ptr @callee(i8 0) coro_elide_safe
ret ptr %task
; CHECK: %[[TASK:.+]] = alloca %struct.Task, align 8
; CHECK-NEXT: %[[FRAME:.+]] = alloca [32 x i8], align 8
@@ -69,6 +73,25 @@ entry:
; CHECK-NEXT: ret ptr %[[TASK]]
}
+; CHECK-LABEL: define ptr @caller_conditional(i1 %cond)
+; Function Attrs: presplitcoroutine
+define ptr @caller_conditional(i1 %cond) #0 {
+entry:
+ br i1 %cond, label %call, label %ret
+
+call:
+ ; CHECK-NOT: alloca
+ ; CHECK-NOT: @llvm.coro.id({{.*}}, ptr @callee, {{.*}})
+ ; CHECK: %task = call ptr @callee(i8 0)
+ ; CHECK-NEXT: br label %ret
+ %task = call ptr @callee(i8 0) coro_elide_safe
+ br label %ret
+
+ret:
+ %retval = phi ptr [ %task, %call ], [ null, %entry ]
+ ret ptr %retval
+}
+
declare token @llvm.coro.id(i32, ptr, ptr, ptr)
declare ptr @llvm.coro.begin(token, ptr)
declare ptr @llvm.coro.frame()
@@ -76,4 +99,3 @@ declare ptr @llvm.coro.subfn.addr(ptr, i8)
declare i1 @llvm.coro.alloc(token)
attributes #0 = { presplitcoroutine }
-attributes #1 = { coro_elide_safe }
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
index 4173c32..f45798b 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
@@ -7,10 +7,10 @@
; state, and the block that determines the next state.
; < path of BBs that form a cycle > [ state, determinator ]
define i32 @test1(i32 %num) !prof !0{
-; CHECK: < case2 for.inc for.body > [ 1, for.inc ]
-; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ]
-; CHECK-NEXT: < case1 for.inc for.body > [ 2, for.inc ]
-; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ]
+; CHECK: < case2, for.inc, for.body > [ 1, for.inc ]
+; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ]
+; CHECK-NEXT: < case1, for.inc, for.body > [ 2, for.inc ]
+; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ]
entry:
br label %for.body
@@ -47,12 +47,12 @@ for.end:
; complicated CFG. Here the FSM is represented as a nested loop, with
; fallthrough cases.
define i32 @test2(i32 %init) {
-; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ]
-; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ]
-; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ]
-; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ]
-; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ]
-; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ]
+; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ]
+; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ]
+; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ]
+; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ]
+; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ]
+; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ]
entry:
%cmp = icmp eq i32 %init, 0
%sel = select i1 %cmp, i32 0, i32 2
@@ -187,12 +187,12 @@ bb66: ; preds = %bb59
; Value %init is not predictable but it's okay since it is the value initial to the switch.
define i32 @initial.value.positive1(i32 %init) !prof !0 {
-; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ]
-; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ]
-; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ]
-; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ]
-; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ]
-; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ]
+; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ]
+; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ]
+; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ]
+; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ]
+; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ]
+; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ]
entry:
%cmp = icmp eq i32 %init, 0
br label %loop.1
diff --git a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll
index 92747629..cb7c46e 100644
--- a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll
@@ -9,9 +9,9 @@
; too long so that it is not jump-threaded.
define i32 @max_path_length(i32 %num) {
; CHECK-NOT: 3, case1
-; CHECK: < case2 for.inc for.body > [ 1, for.inc ]
-; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ]
-; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ]
+; CHECK: < case2, for.inc, for.body > [ 1, for.inc ]
+; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ]
+; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ]
; CHECK-NEXT: DFA-JT: Renaming non-local uses of:
entry:
br label %for.body
diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll
index 0c922da..bbbc5c5 100644
--- a/llvm/test/Transforms/GVN/assume-equal.ll
+++ b/llvm/test/Transforms/GVN/assume-equal.ll
@@ -221,21 +221,22 @@ define i32 @_Z1ii(i32 %p) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[P]], 42
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT: br i1 true, label %[[BB2:.*]], label %[[BB2]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: br i1 true, label %[[BB2]], label %[[BB2]]
-; CHECK: [[BB0:.*:]]
+; CHECK-NEXT: br i1 true, label %[[COMMON:.*]], label %[[COMMON]]
+; CHECK: [[COMMON]]:
+; CHECK-NEXT: br i1 true, label %[[COMMON]], label %[[COMMON]]
+; CHECK: [[EXIT:.*:]]
; CHECK-NEXT: ret i32 42
;
entry:
%cmp = icmp eq i32 %p, 42
call void @llvm.assume(i1 %cmp)
- br i1 %cmp, label %bb2, label %bb2
-bb2:
+ br i1 %cmp, label %common, label %common
+common:
call void @llvm.assume(i1 true)
- br i1 %cmp, label %bb2, label %bb2
+ br i1 %cmp, label %common, label %common
+exit:
ret i32 %p
}
@@ -357,8 +358,8 @@ define i8 @assume_ptr_eq_different_prov_matters(ptr %p, ptr %p2) {
ret i8 %v
}
-define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) {
-; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter(
+define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp(ptr %p, ptr %p2) {
+; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp(
; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]]
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
@@ -371,6 +372,36 @@ define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) {
ret i1 %c
}
+; This is not correct, as it may change the provenance exposed by ptrtoint.
+; We still allow it for now.
+define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p2) {
+; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: ret i64 [[INT]]
+;
+ %cmp = icmp eq ptr %p, %p2
+ call void @llvm.assume(i1 %cmp)
+ %int = ptrtoint ptr %p2 to i64
+ ret i64 %int
+}
+
+define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr %p2) {
+; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: [[INT:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT: ret i64 [[INT]]
+;
+ %cmp = icmp eq ptr %p, %p2
+ call void @llvm.assume(i1 %cmp)
+ %int = ptrtoaddr ptr %p2 to i64
+ ret i64 %int
+}
+
define i8 @assume_ptr_eq_same_prov(ptr %p, i64 %x) {
; CHECK-LABEL: define i8 @assume_ptr_eq_same_prov(
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
index 61b1331..5211fbd 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
@@ -1,6 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-target datalayout = "p1:64:64:64:32"
+
+; The ptrtoaddr folds are also valid for pointers that have external state.
+target datalayout = "pe1:64:64:64:32"
+
+@g = external global i8
+@g2 = external global i8
+
+@g.as1 = external addrspace(1) global i8
+@g2.as1 = external addrspace(1) global i8
define i32 @ptrtoaddr_inttoptr_arg(i32 %a) {
; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_arg(
@@ -24,14 +32,14 @@ define i32 @ptrtoaddr_inttoptr() {
define i32 @ptrtoaddr_inttoptr_diff_size1() {
; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size1() {
-; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32)
+; CHECK-NEXT: ret i32 -1
;
ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32)
}
define i32 @ptrtoaddr_inttoptr_diff_size2() {
; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size2() {
-; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32)
+; CHECK-NEXT: ret i32 65535
;
ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32)
}
@@ -52,14 +60,73 @@ define i64 @ptr2addr2_inttoptr_noas2() {
define i64 @ptrtoaddr_inttoptr_noas_diff_size1() {
; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size1() {
-; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64)
+; CHECK-NEXT: ret i64 4294967295
;
ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64)
}
define i64 @ptrtoaddr_inttoptr_noas_diff_size2() {
; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size2() {
-; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64)
+; CHECK-NEXT: ret i64 -1
;
ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64)
}
+
+define i64 @ptrtoaddr_gep_null() {
+; CHECK-LABEL: define i64 @ptrtoaddr_gep_null() {
+; CHECK-NEXT: ret i64 42
+;
+ ret i64 ptrtoaddr (ptr getelementptr (i8, ptr null, i64 42) to i64)
+}
+
+define i32 @ptrtoaddr_gep_null_addrsize() {
+; CHECK-LABEL: define i32 @ptrtoaddr_gep_null_addrsize() {
+; CHECK-NEXT: ret i32 42
+;
+ ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i32 42) to i32)
+}
+
+define i64 @ptrtoaddr_gep_sub() {
+; CHECK-LABEL: define i64 @ptrtoaddr_gep_sub() {
+; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64))
+;
+ ret i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoaddr (ptr @g2 to i64))) to i64)
+}
+
+define i32 @ptrtoaddr_gep_sub_addrsize() {
+; CHECK-LABEL: define i32 @ptrtoaddr_gep_sub_addrsize() {
+; CHECK-NEXT: ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32))
+;
+ ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 sub (i32 0, i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32))) to i32)
+}
+
+; Don't fold inttoptr of ptrtoaddr away. inttoptr will pick a previously
+; exposed provenance, which is not necessarily that of @g (especially as
+; ptrtoaddr does not expose the provenance.)
+define ptr @inttoptr_of_ptrtoaddr() {
+; CHECK-LABEL: define ptr @inttoptr_of_ptrtoaddr() {
+; CHECK-NEXT: ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr)
+;
+ ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr)
+}
+
+define i64 @ptrtoaddr_sub_consts_unrelated() {
+; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_unrelated() {
+; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64))
+;
+ ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64))
+}
+
+define i64 @ptrtoaddr_sub_consts_offset() {
+; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_offset() {
+; CHECK-NEXT: ret i64 42
+;
+ ret i64 sub (i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 42) to i64), i64 ptrtoaddr (ptr @g to i64))
+}
+
+define i32 @ptrtoaddr_sub_consts_offset_addrsize() {
+; CHECK-LABEL: define i32 @ptrtoaddr_sub_consts_offset_addrsize() {
+; CHECK-NEXT: ret i32 42
+;
+ ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 42) to i32), i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32))
+}
diff --git a/llvm/test/Transforms/InstSimplify/ptr_diff.ll b/llvm/test/Transforms/InstSimplify/ptr_diff.ll
index d18b462..fdd9e8e 100644
--- a/llvm/test/Transforms/InstSimplify/ptr_diff.ll
+++ b/llvm/test/Transforms/InstSimplify/ptr_diff.ll
@@ -1,11 +1,9 @@
-; NOTE: Assertions have been autogenerated by update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-define i64 @ptrdiff1(ptr %ptr) {
-; CHECK-LABEL: @ptrdiff1(
-; CHECK: ret i64 42
+define i64 @ptrdiff(ptr %ptr) {
+; CHECK-LABEL: @ptrdiff(
+; CHECK-NEXT: ret i64 42
;
%last = getelementptr inbounds i8, ptr %ptr, i32 42
%first.int = ptrtoint ptr %ptr to i64
@@ -14,9 +12,24 @@ define i64 @ptrdiff1(ptr %ptr) {
ret i64 %diff
}
-define i64 @ptrdiff2(ptr %ptr) {
-; CHECK-LABEL: @ptrdiff2(
-; CHECK: ret i64 42
+define i64 @ptrdiff_no_inbounds(ptr %ptr) {
+; CHECK-LABEL: @ptrdiff_no_inbounds(
+; CHECK-NEXT: [[LAST:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 42
+; CHECK-NEXT: [[FIRST_INT:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT: [[LAST_INT:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[LAST_INT]], [[FIRST_INT]]
+; CHECK-NEXT: ret i64 [[DIFF]]
+;
+ %last = getelementptr i8, ptr %ptr, i32 42
+ %first.int = ptrtoint ptr %ptr to i64
+ %last.int = ptrtoint ptr %last to i64
+ %diff = sub i64 %last.int, %first.int
+ ret i64 %diff
+}
+
+define i64 @ptrdiff_chain(ptr %ptr) {
+; CHECK-LABEL: @ptrdiff_chain(
+; CHECK-NEXT: ret i64 42
;
%first2 = getelementptr inbounds i8, ptr %ptr, i32 1
%first3 = getelementptr inbounds i8, ptr %first2, i32 2
@@ -31,26 +44,10 @@ define i64 @ptrdiff2(ptr %ptr) {
ret i64 %diff
}
-define i64 @ptrdiff3(ptr %ptr) {
-; Don't bother with non-inbounds GEPs.
-; CHECK-LABEL: @ptrdiff3(
-; CHECK: [[LAST:%.*]] = getelementptr i8, ptr %ptr, i32 42
-; CHECK-NEXT: [[FIRST_INT:%.*]] = ptrtoint ptr %ptr to i64
-; CHECK-NEXT: [[LAST_INT:%.*]] = ptrtoint ptr [[LAST]] to i64
-; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[LAST_INT]], [[FIRST_INT]]
-; CHECK-NEXT: ret i64 [[DIFF]]
-;
- %last = getelementptr i8, ptr %ptr, i32 42
- %first.int = ptrtoint ptr %ptr to i64
- %last.int = ptrtoint ptr %last to i64
- %diff = sub i64 %last.int, %first.int
- ret i64 %diff
-}
-
-define <4 x i32> @ptrdiff4(<4 x ptr> %arg) nounwind {
; Handle simple cases of vectors of pointers.
-; CHECK-LABEL: @ptrdiff4(
-; CHECK: ret <4 x i32> zeroinitializer
+define <4 x i32> @ptrdiff_vectors(<4 x ptr> %arg) nounwind {
+; CHECK-LABEL: @ptrdiff_vectors(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
;
%p1 = ptrtoint <4 x ptr> %arg to <4 x i32>
%bc = bitcast <4 x ptr> %arg to <4 x ptr>
@@ -63,9 +60,9 @@ define <4 x i32> @ptrdiff4(<4 x ptr> %arg) nounwind {
@global = internal global %struct.ham zeroinitializer, align 4
-define i32 @ptrdiff5() nounwind {
-; CHECK-LABEL: @ptrdiff5(
-; CHECK: bb:
+define i32 @ptrdiff_global() nounwind {
+; CHECK-LABEL: @ptrdiff_global(
+; CHECK-NEXT: bb:
; CHECK-NEXT: ret i32 0
;
bb:
diff --git a/llvm/test/Transforms/LICM/vector-intrinsics.ll b/llvm/test/Transforms/LICM/vector-intrinsics.ll
new file mode 100644
index 0000000..351773e
--- /dev/null
+++ b/llvm/test/Transforms/LICM/vector-intrinsics.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes='loop-mssa(licm)' -verify-memoryssa %s | FileCheck %s
+
+define i32 @reduce_umax(<2 x i32> %inv, i1 %c) {
+; CHECK-LABEL: define i32 @reduce_umax(
+; CHECK-SAME: <2 x i32> [[INV:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[REDUCE_UMAX:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[INV]])
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[REDUCE_UMAX]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[IV_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ]
+ %iv.next = add i32 %iv, 1
+ br i1 %c, label %cond.true, label %exit
+
+cond.true:
+ %reduce.umax = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %inv)
+ %backedge.cond = icmp ult i32 %iv, %reduce.umax
+ br i1 %backedge.cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv
+}
+
+define i32 @vp_umax(<2 x i32> %inv.l, <2 x i32> %inv.r, i1 %c) {
+; CHECK-LABEL: define i32 @vp_umax(
+; CHECK-SAME: <2 x i32> [[INV_L:%.*]], <2 x i32> [[INV_R:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[VP_UMAX:%.*]] = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> [[INV_L]], <2 x i32> [[INV_R]], <2 x i1> splat (i1 true), i32 2)
+; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UMAX]], i32 0
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[IV_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ]
+ %iv.next = add i32 %iv, 1
+ br i1 %c, label %cond.true, label %exit
+
+cond.true:
+ %vp.umax = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %inv.l, <2 x i32> %inv.r, <2 x i1> splat (i1 1), i32 2)
+ %extract = extractelement <2 x i32> %vp.umax, i32 0
+ %backedge.cond = icmp ult i32 %iv, %extract
+ br i1 %backedge.cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv
+}
+
+define i32 @vp_udiv(<2 x i32> %inv.q, <2 x i32> %inv.d, i1 %c) {
+; CHECK-LABEL: define i32 @vp_udiv(
+; CHECK-SAME: <2 x i32> [[INV_Q:%.*]], <2 x i32> [[INV_D:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]]
+; CHECK: [[COND_TRUE]]:
+; CHECK-NEXT: [[VP_UDIV:%.*]] = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> [[INV_Q]], <2 x i32> [[INV_D]], <2 x i1> splat (i1 true), i32 2)
+; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UDIV]], i32 0
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[IV_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ]
+ %iv.next = add i32 %iv, 1
+ br i1 %c, label %cond.true, label %exit
+
+cond.true:
+ %vp.udiv = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %inv.q, <2 x i32> %inv.d, <2 x i1> splat (i1 1), i32 2)
+ %extract = extractelement <2 x i32> %vp.udiv, i32 0
+ %backedge.cond = icmp ult i32 %iv, %extract
+ br i1 %backedge.cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv
+}
+
+define i32 @vp_load(ptr %inv, i1 %c) {
+; CHECK-LABEL: define i32 @vp_load(
+; CHECK-SAME: ptr [[INV:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]]
+; CHECK: [[COND_TRUE]]:
+; CHECK-NEXT: [[VP_LOAD:%.*]] = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr [[INV]], <2 x i1> splat (i1 true), i32 2)
+; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_LOAD]], i32 0
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[IV_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ]
+ %iv.next = add i32 %iv, 1
+ br i1 %c, label %cond.true, label %exit
+
+cond.true:
+ %vp.load = call <2 x i32> @llvm.vp.load.v2i32(ptr %inv, <2 x i1> splat (i1 1), i32 2)
+ %extract = extractelement <2 x i32> %vp.load, i32 0
+ %backedge.cond = icmp ult i32 %iv, %extract
+ br i1 %backedge.cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv
+}
+
+define i32 @vp_store(<2 x i32> %inv.v, ptr %inv.p, i1 %c) {
+; CHECK-LABEL: define i32 @vp_store(
+; CHECK-SAME: <2 x i32> [[INV_V:%.*]], ptr [[INV_P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]]
+; CHECK: [[COND_TRUE]]:
+; CHECK-NEXT: call void @llvm.vp.store.v2i32.p0(<2 x i32> [[INV_V]], ptr [[INV_P]], <2 x i1> splat (i1 true), i32 2)
+; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], 10
+; CHECK-NEXT: br i1 [[BACKEDGE_COND]], label %[[LOOP]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[IV_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ]
+ %iv.next = add i32 %iv, 1
+ br i1 %c, label %cond.true, label %exit
+
+cond.true:
+ call void @llvm.vp.store.v2i32(<2 x i32> %inv.v, ptr %inv.p, <2 x i1> splat (i1 1), i32 2)
+ %backedge.cond = icmp ult i32 %iv, 10
+ br i1 %backedge.cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv
+}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll
index d73900d..83b494a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll
@@ -2288,7 +2288,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
}
;.
; CHECK: attributes #[[ATTR0]] = { "target-features"="+v" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2]] = { "vector-function-abi-variant"="_ZGVrNxv_acos(Sleef_acosdx_u10rvvm2)" }
; CHECK: attributes #[[ATTR3]] = { "vector-function-abi-variant"="_ZGVrNxv_acosf(Sleef_acosfx_u10rvvm2)" }
; CHECK: attributes #[[ATTR4]] = { "vector-function-abi-variant"="_ZGVrNxv_acosh(Sleef_acoshdx_u10rvvm2)" }
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
index 3500c5c..4fd8d17 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
@@ -546,19 +546,50 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe
; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]])
; CHECK-NEXT: [[N:%.*]] = add i32 [[SEL]], -1
; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SEL]], -2
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]]
+; CHECK-NEXT: br label [[EXIT_LOOPEXIT]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ 0, [[PH]] ]
+; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]]
; CHECK: loop.header:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[PH]] ]
-; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[IV]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I]], align 1
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0
-; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH1]]
; CHECK: loop.latch:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]]
-; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]]
+; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[N_EXT]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: exit.loopexit:
-; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV]], [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ]
+; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV1]], [[LOOP_HEADER1]] ], [ 0, [[LOOP_LATCH1]] ], [ 0, [[LOOP_LATCH]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ -2, [[THEN]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ]
@@ -609,4 +640,6 @@ exit:
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
index 9acc6d6..09f583f 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll
@@ -39,5 +39,4 @@ declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0
; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
index da83c54..8348c97 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
@@ -1,10 +1,12 @@
REQUIRES: aarch64-registered-target
-// Flakey on SVE buildbots, disabled pending invesgitation.
+// This will sometimes fail with "Not all operands were initialized by the snippet generator for...".
UNSUPPORTED: target={{.*}}
RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%t.obj --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1
RUN: llvm-objdump -d %t.obj > %t.s
RUN: FileCheck %s < %t.s
+// Start matching after the printed file path, as that may contain something that looks like a mnemonic.
+CHECK: Disassembly of section .text:
CHECK-NOT: ld{{[1-4]}}
CHECK-NOT: st{{[1-4]}}