diff options
Diffstat (limited to 'llvm/test')
22 files changed, 1118 insertions, 332 deletions
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll index 8d091a0..d380104 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll @@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void umin(unsigned a, unsigned b) { ; a *= 2; @@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>)) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>)) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void smin(signed a, signed b) { ; a *= 2; diff --git a/llvm/test/CodeGen/Hexagon/fmul-v67.ll b/llvm/test/CodeGen/Hexagon/fmul-v67.ll index 49098cd..fc0b7f7 100644 --- a/llvm/test/CodeGen/Hexagon/fmul-v67.ll +++ b/llvm/test/CodeGen/Hexagon/fmul-v67.ll @@ -29,7 +29,7 @@ b2: ; CHECK: [[R22]] += dfmpylh([[R20]],[[R21]]) ; CHECK: [[R22]] += dfmpylh([[R21]],[[R20]]) ; CHECK: [[R22]] += dfmpyhh([[R20]],[[R21]]) -define double @test_02(double %a0, double %a1) #2 { +define double @test_02(double %a0, double %a1) #1 { b2: %v3 = fmul double %a0, %a1 ret double %v3 @@ -40,13 +40,11 @@ b2: ; CHECK: [[R30]] += dfmpylh(r1:0,r3:2) ; CHECK: [[R30]] += dfmpylh(r3:2,r1:0) ; CHECK: [[R30]] += dfmpyhh(r1:0,r3:2) -define double @test_03(double %a0, double %a1) #3 { +define double @test_03(double %a0, double %a1) #1 { b2: - %v3 = fmul double %a0, %a1 + %v3 = fmul afn double %a0, %a1 ret double %v3 } attributes #0 = { nounwind } attributes #1 = { nounwind "target-cpu"="hexagonv67" } -attributes #2 = { nounwind "target-cpu"="hexagonv67" "unsafe-fp-math"="false" } -attributes #3 = { nounwind "target-cpu"="hexagonv67" "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll index 41a0e81..1edb387 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll @@ -12,63 +12,104 @@ declare void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols) declare void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols) declare void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols) -; CHECK-LABEL: test_tcgen05_alloc -define void @test_tcgen05_alloc(ptr %addr, i32 %ncols) { -; CHECK_PTX64-LABEL: test_tcgen05_alloc( +define void @test_tcgen05_alloc_cg1(ptr %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0]; -; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1; -; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols) - call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols) + ret void +} +define void @test_tcgen05_alloc_cg2(ptr %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; +; CHECK_PTX64_SHARED32-NEXT: ret; + call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols) ret void } -; CHECK-LABEL: test_tcgen05_alloc_shared -define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) { -; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared( +define void @test_tcgen05_alloc_shared_cg1(ptr addrspace(3) %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_param_0]; -; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%rd1], %r1; -; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%r1], %r2; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols) + ret void +} +define void @test_tcgen05_alloc_shared_cg2(ptr addrspace(3) %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols) ret void } @@ -76,31 +117,50 @@ define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) { declare void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) declare void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) -; CHECK-LABEL: test_tcgen05_dealloc -define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) { -; CHECK_PTX64-LABEL: test_tcgen05_dealloc( +define void @test_tcgen05_dealloc_cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b32 %r<3>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0]; -; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2; -; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) + ret void +} +define void @test_tcgen05_dealloc_cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b32 %r<3>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) ret void } @@ -108,27 +168,42 @@ define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) { declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1() declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2() -; CHECK-LABEL: test_tcgen05_relinquish_alloc_permit -define void @test_tcgen05_relinquish_alloc_permit() { -; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit( +define void @test_tcgen05_relinquish_alloc_permit_cg1() { +; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: ; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned; -; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: ; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1() + ret void +} +define void @test_tcgen05_relinquish_alloc_permit_cg2() { +; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2() ret void } diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll index 7981feb..2e80c4c 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll @@ -11,57 +11,93 @@ declare void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr) declare void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr) declare void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr) -; CHECK-LABEL: test_tcgen05_commit -define void @test_tcgen05_commit(ptr %bar_addr) { -; CHECK_PTX64-LABEL: test_tcgen05_commit( +define void @test_tcgen05_commit_cg1(ptr %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.cg1(ptr %bar_addr) + ret void +} + +define void @test_tcgen05_commit_cg2(ptr %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr) ret void } -; CHECK-LABEL: test_tcgen05_commit_shared -define void @test_tcgen05_commit_shared(ptr addrspace(3) %bar_addr) { -; CHECK_PTX64-LABEL: test_tcgen05_commit_shared( +define void @test_tcgen05_commit_shared_cg1(ptr addrspace(3) %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_param_0]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg1_param_0]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg1_param_0]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%r1]; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1]; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr) + ret void +} + +define void @test_tcgen05_commit_shared_cg2(ptr addrspace(3) %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg2_param_0]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1]; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr) ret void @@ -72,66 +108,106 @@ declare void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %bar_addr, i16 %cta_mask) declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask) declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask) -; CHECK-LABEL: test_tcgen05_commit_mc -define void @test_tcgen05_commit_mc(ptr %bar_addr, i16 %cta_mask) { -; CHECK_PTX64-LABEL: test_tcgen05_commit_mc( +define void @test_tcgen05_commit_mc_cg1(ptr %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0]; -; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.cg1(ptr %bar_addr, i16 %cta_mask) + ret void +} +define void @test_tcgen05_commit_mc_cg2(ptr %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %bar_addr, i16 %cta_mask) - ret void } -; CHECK-LABEL: test_tcgen05_commit_mc_shared -define void @test_tcgen05_commit_mc_shared(ptr addrspace(3) %bar_addr, i16 %cta_mask) { -; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared( +define void @test_tcgen05_commit_mc_shared_cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_param_0]; -; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask) + ret void +} +define void @test_tcgen05_commit_mc_shared_cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask) - ret void } diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll index c540f78..817b1d5 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll @@ -4,346 +4,580 @@ ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} -; CHECK-LABEL: test_tcgen05_cp_64x128_v1 -define void @test_tcgen05_cp_64x128_v1(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v1( +define void @test_tcgen05_cp_64x128_v1_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v1_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v2 -define void @test_tcgen05_cp_64x128_v2(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v2( +define void @test_tcgen05_cp_64x128_v2_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v2_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_32x128 -define void @test_tcgen05_cp_32x128(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_32x128( +define void @test_tcgen05_cp_32x128_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_32x128_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x128b -define void @test_tcgen05_cp_128x128b(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x128b( +define void @test_tcgen05_cp_128x128b_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x128b_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x256b -define void @test_tcgen05_cp_128x256b(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x256b( +define void @test_tcgen05_cp_128x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_4x256b -define void @test_tcgen05_cp_4x256b(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_4x256b( +define void @test_tcgen05_cp_4x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_4x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } ; With src_fmt as b6x16_p32 -; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32 -define void @test_tcgen05_cp_128x256b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32( +define void @test_tcgen05_cp_128x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32 -define void @test_tcgen05_cp_4x256b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32( +define void @test_tcgen05_cp_4x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_4x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32 -define void @test_tcgen05_cp_128x128b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32( +define void @test_tcgen05_cp_128x128b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x128b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32 -define void @test_tcgen05_cp_64x128_v1_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32( +define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32 -define void @test_tcgen05_cp_64x128_v2_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32( +define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32 -define void @test_tcgen05_cp_32x128_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32( +define void @test_tcgen05_cp_32x128_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_32x128_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } ; With src_fmt as b4x16_p64 -; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64 -define void @test_tcgen05_cp_128x256b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64( +define void @test_tcgen05_cp_128x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64 -define void @test_tcgen05_cp_4x256b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64( +define void @test_tcgen05_cp_4x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_4x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64 -define void @test_tcgen05_cp_128x128b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64( +define void @test_tcgen05_cp_128x128b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x128b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64 -define void @test_tcgen05_cp_64x128_v1_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64( +define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64 -define void @test_tcgen05_cp_64x128_v2_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64( +define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64 -define void @test_tcgen05_cp_32x128_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64( +define void @test_tcgen05_cp_32x128_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_32x128_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll index 8ca6a2a0..bf2adac 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll @@ -7,18 +7,29 @@ declare void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr) declare void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr) -; CHECK-LABEL: test_tcgen05_shift -define void @test_tcgen05_shift(ptr addrspace(6) %tmem_addr) { -; CHECK-LABEL: test_tcgen05_shift( +define void @test_tcgen05_shift_cg1(ptr addrspace(6) %tmem_addr) { +; CHECK-LABEL: test_tcgen05_shift_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_cg1_param_0]; ; CHECK-NEXT: tcgen05.shift.cta_group::1.down [%r1]; -; CHECK-NEXT: tcgen05.shift.cta_group::2.down [%r1]; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr) + + ret void +} + +define void @test_tcgen05_shift_cg2(ptr addrspace(6) %tmem_addr) { +; CHECK-LABEL: test_tcgen05_shift_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_cg2_param_0]; +; CHECK-NEXT: tcgen05.shift.cta_group::2.down [%r1]; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr) ret void diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll index 4537d18..b2ad8d7 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll @@ -441,7 +441,7 @@ define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext % ; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: packh a0, a0, a1 -; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packh a1, a2, a3 ; RV64ZBKB-NEXT: packw a0, a0, a1 ; RV64ZBKB-NEXT: sw a0, 0(a4) ; RV64ZBKB-NEXT: ret @@ -477,7 +477,7 @@ define void @pack_lo_packh_hi_packh_3(i8 %0, i8 %1, i8 %2, i8 %3, ptr %p) nounwi ; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_3: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: packh a0, a0, a1 -; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packh a1, a2, a3 ; RV64ZBKB-NEXT: packw a0, a0, a1 ; RV64ZBKB-NEXT: sw a0, 0(a4) ; RV64ZBKB-NEXT: ret @@ -509,7 +509,7 @@ define i32 @pack_lo_packh_hi_packh_4(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2 ; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_4: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: packh a0, a0, a1 -; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packh a1, a2, a3 ; RV64ZBKB-NEXT: packw a0, a0, a1 ; RV64ZBKB-NEXT: ret %a = zext i8 %0 to i32 diff --git a/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll deleted file mode 100644 index a4846176..0000000 --- a/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll +++ /dev/null @@ -1,27 +0,0 @@ -; Test that ASAN will not instrument lifetime markers on alloca offsets. -; -; RUN: opt < %s -passes=asan --asan-use-after-scope -S | FileCheck %s - -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.15.0" - -%t = type { ptr, ptr, %sub, i64 } -%sub = type { i32 } - -define void @foo() sanitize_address { -entry: - %0 = alloca %t, align 8 - %x = getelementptr inbounds %t, ptr %0, i64 0, i32 2 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) - call void @bar(ptr nonnull %x) - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) #3 - ret void -} - -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) -declare void @bar(ptr) -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) - -; CHECK: store i64 %[[STACK_BASE:.+]], ptr %asan_local_stack_base, align 8 -; CHECK-NOT: store i8 0 -; CHECK: call void @bar(ptr nonnull %x) diff --git a/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll b/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll index 0859a7e..d7204e6 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll @@ -9,15 +9,15 @@ define void @foo() #0 { entry: %array01 = alloca [1 x i8], align 1 %array02 = alloca [2 x i8], align 1 -; OUTLINE: call void @__asan_set_shadow_f1(i64 %23, i64 4) -; OUTLINE: call void @__asan_set_shadow_01(i64 %24, i64 1) -; OUTLINE: call void @__asan_set_shadow_f2(i64 %25, i64 1) -; OUTLINE: call void @__asan_set_shadow_02(i64 %26, i64 1) -; OUTLINE: call void @__asan_set_shadow_f3(i64 %27, i64 1) -; OUTLINE: call void @__asan_stack_free_0(i64 %7, i64 64) -; OUTLINE: call void @__asan_set_shadow_00(i64 %55, i64 8) -; INLINE: store i64 -935919682371587599, ptr %24, align 1 -; INLINE: store i64 -723401728380766731, ptr %52, align 1 +; OUTLINE: call void @__asan_set_shadow_f1(i64 %{{.+}}, i64 4) +; OUTLINE: call void @__asan_set_shadow_01(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_02(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f3(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_stack_free_0(i64 %{{.+}}, i64 64) +; OUTLINE: call void @__asan_set_shadow_00(i64 %{{.+}}, i64 8) +; INLINE: store i64 -935919682371587599, ptr %{{.+}}, align 1 +; INLINE: store i64 -723401728380766731, ptr %{{.+}}, align 1 %arrayidx = getelementptr inbounds [1 x i8], ptr %array01, i64 0, i64 1 store i8 1, ptr %arrayidx, align 1 %arrayidx1 = getelementptr inbounds [2 x i8], ptr %array02, i64 0, i64 2 diff --git a/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll b/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll index 5f122ad..6f52289 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll @@ -14,26 +14,26 @@ entry: %array05 = alloca [5 x i8], align 1 %array06 = alloca [6 x i8], align 1 %array07 = alloca [7 x i8], align 1 -; OUTLINE: call void @__asan_set_shadow_f1(i64 %33, i64 4) -; OUTLINE: call void @__asan_set_shadow_01(i64 %34, i64 1) -; OUTLINE: call void @__asan_set_shadow_f2(i64 %35, i64 1) -; OUTLINE: call void @__asan_set_shadow_02(i64 %36, i64 1) -; OUTLINE: call void @__asan_set_shadow_f2(i64 %37, i64 1) -; OUTLINE: call void @__asan_set_shadow_03(i64 %38, i64 1) -; OUTLINE: call void @__asan_set_shadow_f2(i64 %39, i64 1) -; OUTLINE: call void @__asan_set_shadow_04(i64 %40, i64 1) -; OUTLINE: call void @__asan_set_shadow_f2(i64 %41, i64 1) -; OUTLINE: call void @__asan_set_shadow_05(i64 %42, i64 1) -; OUTLINE: call void @__asan_set_shadow_f2(i64 %43, i64 3) -; OUTLINE: call void @__asan_set_shadow_06(i64 %44, i64 1) -; OUTLINE: call void @__asan_set_shadow_f2(i64 %45, i64 3) -; OUTLINE: call void @__asan_set_shadow_07(i64 %46, i64 1) -; OUTLINE: call void @__asan_set_shadow_f3(i64 %47, i64 3) -; OUTLINE: call void @__asan_stack_free_2(i64 %7, i64 192) -; OUTLINE: call void @__asan_set_shadow_00(i64 %135, i64 24) -; INLINE: store i64 -1007977276409515535, ptr %34, align 1 -; INLINE: store i64 -940423264817843709, ptr %36, align 1 -; INLINE: store i64 -868083087686045178, ptr %38, align 1 +; OUTLINE: call void @__asan_set_shadow_f1(i64 %{{.+}}, i64 4) +; OUTLINE: call void @__asan_set_shadow_01(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_02(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_03(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_04(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_05(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 3) +; OUTLINE: call void @__asan_set_shadow_06(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f2(i64 %{{.+}}, i64 3) +; OUTLINE: call void @__asan_set_shadow_07(i64 %{{.+}}, i64 1) +; OUTLINE: call void @__asan_set_shadow_f3(i64 %{{.+}}, i64 3) +; OUTLINE: call void @__asan_stack_free_2(i64 %{{.+}}, i64 192) +; OUTLINE: call void @__asan_set_shadow_00(i64 %{{.+}}, i64 24) +; INLINE: store i64 -1007977276409515535, ptr %{{.+}}, align 1 +; INLINE: store i64 -940423264817843709, ptr %{{.+}}, align 1 +; INLINE: store i64 -868083087686045178, ptr %{{.+}}, align 1 %arrayidx = getelementptr inbounds [1 x i8], ptr %array01, i64 0, i64 1 store i8 1, ptr %arrayidx, align 1 %arrayidx1 = getelementptr inbounds [2 x i8], ptr %array02, i64 0, i64 2 @@ -48,7 +48,7 @@ entry: store i8 6, ptr %arrayidx5, align 1 %arrayidx6 = getelementptr inbounds [7 x i8], ptr %array07, i64 0, i64 7 store i8 7, ptr %arrayidx6, align 1 -; CHECK-NOT: store i64 -723401728380766731, ptr %126, align 1 +; CHECK-NOT: store i64 -723401728380766731, ptr %{{.+}}, align 1 ret void } attributes #0 = { noinline nounwind optnone sanitize_address ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" } diff --git a/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll index 3568434..07b9a1c 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=2 -S | FileCheck %s +; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-stack-depth -sanitizer-coverage-stack-depth-callback-min=1 -S | FileCheck %s --check-prefix=CHECK-STACK-CALLBACK +; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-stack-depth -S | FileCheck %s --check-prefix=CHECK-STACK-DEPTH target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -55,6 +57,86 @@ entry: ret i32 %t } +define i32 @with_dbg_stack_callback(ptr %a) !dbg !8 { +; CHECK-STACK-CALLBACK-LABEL: define i32 @with_dbg_stack_callback( +; CHECK-STACK-CALLBACK-SAME: ptr [[A:%.*]]) !dbg [[DBG8:![0-9]+]] { +; CHECK-STACK-CALLBACK-NEXT: entry: +; CHECK-STACK-CALLBACK-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-CALLBACK-NEXT: call void @__sanitizer_cov_stack_depth() #[[ATTR1:[0-9]+]], !dbg [[DBG9:![0-9]+]] +; CHECK-STACK-CALLBACK-NEXT: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-CALLBACK-NEXT: call void @external_func() +; CHECK-STACK-CALLBACK-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +define i32 @with_dbg_stack_depth(ptr %a) !dbg !10 { +; CHECK-STACK-DEPTH-LABEL: define i32 @with_dbg_stack_depth( +; CHECK-STACK-DEPTH-SAME: ptr [[A:%.*]]) !dbg [[DBG10:![0-9]+]] { +; CHECK-STACK-DEPTH-NEXT: entry: +; CHECK-STACK-DEPTH-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-DEPTH-NEXT: [[TMP1:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) +; CHECK-STACK-DEPTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-STACK-DEPTH-NEXT: [[TMP3:%.*]] = load i64, ptr @__sancov_lowest_stack, align 8 +; CHECK-STACK-DEPTH-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP2]], [[TMP3]] +; CHECK-STACK-DEPTH-NEXT: br i1 [[TMP4]], label {{%.*}}, label {{%.*}} +; CHECK-STACK-DEPTH: store i64 [[TMP2]], ptr @__sancov_lowest_stack, align 8, !dbg [[DBG11:![0-9]+]], {{.*}}!nosanitize +; CHECK-STACK-DEPTH: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-DEPTH-NEXT: call void @external_func() +; CHECK-STACK-DEPTH-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +define i32 @without_dbg_stack_callback(ptr %a) { +; CHECK-STACK-CALLBACK-LABEL: define i32 @without_dbg_stack_callback( +; CHECK-STACK-CALLBACK-SAME: ptr [[A:%.*]]) { +; CHECK-STACK-CALLBACK-NEXT: entry: +; CHECK-STACK-CALLBACK-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-CALLBACK-NEXT: call void @__sanitizer_cov_stack_depth() #[[ATTR1]] +; CHECK-STACK-CALLBACK-NEXT: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-CALLBACK-NEXT: call void @external_func() +; CHECK-STACK-CALLBACK-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +define i32 @without_dbg_stack_depth(ptr %a) { +; CHECK-STACK-DEPTH-LABEL: define i32 @without_dbg_stack_depth( +; CHECK-STACK-DEPTH-SAME: ptr [[A:%.*]]) { +; CHECK-STACK-DEPTH-NEXT: entry: +; CHECK-STACK-DEPTH-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-DEPTH-NEXT: [[TMP1:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) +; CHECK-STACK-DEPTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-STACK-DEPTH-NEXT: [[TMP3:%.*]] = load i64, ptr @__sancov_lowest_stack, align 8 +; CHECK-STACK-DEPTH-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP2]], [[TMP3]] +; CHECK-STACK-DEPTH-NEXT: br i1 [[TMP4]], label {{%.*}}, label {{%.*}} +; CHECK-STACK-DEPTH: store i64 [[TMP2]], ptr @__sancov_lowest_stack, align 8, {{.*}}!nosanitize +; CHECK-STACK-DEPTH: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-DEPTH-NEXT: call void @external_func() +; CHECK-STACK-DEPTH-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +declare void @external_func() + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} @@ -66,6 +148,10 @@ entry: !5 = !{} !6 = !DILocation(line: 192, scope: !3) !7 = !DILocation(line: 0, scope: !3) +!8 = distinct !DISubprogram(name: "with_dbg_stack_callback", scope: !1, file: !1, line: 200, type: !4, scopeLine: 200, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!9 = !DILocation(line: 200, scope: !8) +!10 = distinct !DISubprogram(name: "with_dbg_stack_depth", scope: !1, file: !1, line: 210, type: !4, scopeLine: 210, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!11 = !DILocation(line: 210, scope: !10) ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C89, file: [[META1:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None) @@ -76,3 +162,9 @@ entry: ; CHECK: [[DBG6]] = !DILocation(line: 192, scope: [[DBG3]]) ; CHECK: [[DBG7]] = !DILocation(line: 0, scope: [[DBG3]]) ;. +; CHECK-STACK-CALLBACK: [[DBG8]] = distinct !DISubprogram(name: "with_dbg_stack_callback", scope: {{.*}}, file: {{.*}}, line: 200 +; CHECK-STACK-CALLBACK: [[DBG9]] = !DILocation(line: 200, scope: [[DBG8]]) +;. +; CHECK-STACK-DEPTH: [[DBG10]] = distinct !DISubprogram(name: "with_dbg_stack_depth", scope: {{.*}}, file: {{.*}}, line: 210 +; CHECK-STACK-DEPTH: [[DBG11]] = !DILocation(line: 210, scope: [[DBG10]]) +;. diff --git a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll index 4eec7ed..722693d 100644 --- a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll @@ -1,4 +1,8 @@ -; Testing elide performed its job for calls to coroutines marked safe. +; Coroutine calls marked with `coro_elide_safe` should be elided. +; Inside `caller`, we expect the `callee` coroutine to be elided. +; Inside `caller_conditional`, `callee` is only called on an unlikely +; path, hence we expect the `callee` coroutine NOT to be elided. +; ; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' | FileCheck %s %struct.Task = type { ptr } @@ -57,7 +61,7 @@ define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) { ; Function Attrs: presplitcoroutine define ptr @caller() #0 { entry: - %task = call ptr @callee(i8 0) #1 + %task = call ptr @callee(i8 0) coro_elide_safe ret ptr %task ; CHECK: %[[TASK:.+]] = alloca %struct.Task, align 8 ; CHECK-NEXT: %[[FRAME:.+]] = alloca [32 x i8], align 8 @@ -69,6 +73,25 @@ entry: ; CHECK-NEXT: ret ptr %[[TASK]] } +; CHECK-LABEL: define ptr @caller_conditional(i1 %cond) +; Function Attrs: presplitcoroutine +define ptr @caller_conditional(i1 %cond) #0 { +entry: + br i1 %cond, label %call, label %ret + +call: + ; CHECK-NOT: alloca + ; CHECK-NOT: @llvm.coro.id({{.*}}, ptr @callee, {{.*}}) + ; CHECK: %task = call ptr @callee(i8 0) + ; CHECK-NEXT: br label %ret + %task = call ptr @callee(i8 0) coro_elide_safe + br label %ret + +ret: + %retval = phi ptr [ %task, %call ], [ null, %entry ] + ret ptr %retval +} + declare token @llvm.coro.id(i32, ptr, ptr, ptr) declare ptr @llvm.coro.begin(token, ptr) declare ptr @llvm.coro.frame() @@ -76,4 +99,3 @@ declare ptr @llvm.coro.subfn.addr(ptr, i8) declare i1 @llvm.coro.alloc(token) attributes #0 = { presplitcoroutine } -attributes #1 = { coro_elide_safe } diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index 4173c32..f45798b 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -7,10 +7,10 @@ ; state, and the block that determines the next state. ; < path of BBs that form a cycle > [ state, determinator ] define i32 @test1(i32 %num) !prof !0{ -; CHECK: < case2 for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < case1 for.inc for.body > [ 2, for.inc ] -; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ] +; CHECK: < case2, for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < case1, for.inc, for.body > [ 2, for.inc ] +; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ] entry: br label %for.body @@ -47,12 +47,12 @@ for.end: ; complicated CFG. Here the FSM is represented as a nested loop, with ; fallthrough cases. define i32 @test2(i32 %init) { -; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ] -; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ] -; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ] -; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ] -; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ] -; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ] +; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ] +; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ] +; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ] +; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ] +; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ] entry: %cmp = icmp eq i32 %init, 0 %sel = select i1 %cmp, i32 0, i32 2 @@ -187,12 +187,12 @@ bb66: ; preds = %bb59 ; Value %init is not predictable but it's okay since it is the value initial to the switch. define i32 @initial.value.positive1(i32 %init) !prof !0 { -; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ] -; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ] -; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ] -; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ] -; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ] -; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ] +; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ] +; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ] +; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ] +; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ] +; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ] entry: %cmp = icmp eq i32 %init, 0 br label %loop.1 diff --git a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll index 92747629..cb7c46e 100644 --- a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll +++ b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll @@ -9,9 +9,9 @@ ; too long so that it is not jump-threaded. define i32 @max_path_length(i32 %num) { ; CHECK-NOT: 3, case1 -; CHECK: < case2 for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ] +; CHECK: < case2, for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ] ; CHECK-NEXT: DFA-JT: Renaming non-local uses of: entry: br label %for.body diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll index 0c922da..bbbc5c5 100644 --- a/llvm/test/Transforms/GVN/assume-equal.ll +++ b/llvm/test/Transforms/GVN/assume-equal.ll @@ -221,21 +221,22 @@ define i32 @_Z1ii(i32 %p) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[P]], 42 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: br i1 true, label %[[BB2:.*]], label %[[BB2]] -; CHECK: [[BB2]]: -; CHECK-NEXT: br i1 true, label %[[BB2]], label %[[BB2]] -; CHECK: [[BB0:.*:]] +; CHECK-NEXT: br i1 true, label %[[COMMON:.*]], label %[[COMMON]] +; CHECK: [[COMMON]]: +; CHECK-NEXT: br i1 true, label %[[COMMON]], label %[[COMMON]] +; CHECK: [[EXIT:.*:]] ; CHECK-NEXT: ret i32 42 ; entry: %cmp = icmp eq i32 %p, 42 call void @llvm.assume(i1 %cmp) - br i1 %cmp, label %bb2, label %bb2 -bb2: + br i1 %cmp, label %common, label %common +common: call void @llvm.assume(i1 true) - br i1 %cmp, label %bb2, label %bb2 + br i1 %cmp, label %common, label %common +exit: ret i32 %p } @@ -357,8 +358,8 @@ define i8 @assume_ptr_eq_different_prov_matters(ptr %p, ptr %p2) { ret i8 %v } -define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) { -; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter( +define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp(ptr %p, ptr %p2) { +; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp( ; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) @@ -371,6 +372,36 @@ define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) { ret i1 %c } +; This is not correct, as it may change the provenance exposed by ptrtoint. +; We still allow it for now. +define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: ret i64 [[INT]] +; + %cmp = icmp eq ptr %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoint ptr %p2 to i64 + ret i64 %int +} + +define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: ret i64 [[INT]] +; + %cmp = icmp eq ptr %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoaddr ptr %p2 to i64 + ret i64 %int +} + define i8 @assume_ptr_eq_same_prov(ptr %p, i64 %x) { ; CHECK-LABEL: define i8 @assume_ptr_eq_same_prov( ; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) { diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index 61b1331..5211fbd 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -1,6 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s -target datalayout = "p1:64:64:64:32" + +; The ptrtoaddr folds are also valid for pointers that have external state. +target datalayout = "pe1:64:64:64:32" + +@g = external global i8 +@g2 = external global i8 + +@g.as1 = external addrspace(1) global i8 +@g2.as1 = external addrspace(1) global i8 define i32 @ptrtoaddr_inttoptr_arg(i32 %a) { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_arg( @@ -24,14 +32,14 @@ define i32 @ptrtoaddr_inttoptr() { define i32 @ptrtoaddr_inttoptr_diff_size1() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size1() { -; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32) +; CHECK-NEXT: ret i32 -1 ; ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32) } define i32 @ptrtoaddr_inttoptr_diff_size2() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size2() { -; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32) +; CHECK-NEXT: ret i32 65535 ; ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32) } @@ -52,14 +60,73 @@ define i64 @ptr2addr2_inttoptr_noas2() { define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { -; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64) +; CHECK-NEXT: ret i64 4294967295 ; ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64) } define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { -; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64) +; CHECK-NEXT: ret i64 -1 ; ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64) } + +define i64 @ptrtoaddr_gep_null() { +; CHECK-LABEL: define i64 @ptrtoaddr_gep_null() { +; CHECK-NEXT: ret i64 42 +; + ret i64 ptrtoaddr (ptr getelementptr (i8, ptr null, i64 42) to i64) +} + +define i32 @ptrtoaddr_gep_null_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_gep_null_addrsize() { +; CHECK-NEXT: ret i32 42 +; + ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i32 42) to i32) +} + +define i64 @ptrtoaddr_gep_sub() { +; CHECK-LABEL: define i64 @ptrtoaddr_gep_sub() { +; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) +; + ret i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoaddr (ptr @g2 to i64))) to i64) +} + +define i32 @ptrtoaddr_gep_sub_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_gep_sub_addrsize() { +; CHECK-NEXT: ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32)) +; + ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 sub (i32 0, i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32))) to i32) +} + +; Don't fold inttoptr of ptrtoaddr away. inttoptr will pick a previously +; exposed provenance, which is not necessarily that of @g (especially as +; ptrtoaddr does not expose the provenance.) +define ptr @inttoptr_of_ptrtoaddr() { +; CHECK-LABEL: define ptr @inttoptr_of_ptrtoaddr() { +; CHECK-NEXT: ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) +; + ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) +} + +define i64 @ptrtoaddr_sub_consts_unrelated() { +; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_unrelated() { +; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) +; + ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) +} + +define i64 @ptrtoaddr_sub_consts_offset() { +; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_offset() { +; CHECK-NEXT: ret i64 42 +; + ret i64 sub (i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 42) to i64), i64 ptrtoaddr (ptr @g to i64)) +} + +define i32 @ptrtoaddr_sub_consts_offset_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_sub_consts_offset_addrsize() { +; CHECK-NEXT: ret i32 42 +; + ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 42) to i32), i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32)) +} diff --git a/llvm/test/Transforms/InstSimplify/ptr_diff.ll b/llvm/test/Transforms/InstSimplify/ptr_diff.ll index d18b462..fdd9e8e 100644 --- a/llvm/test/Transforms/InstSimplify/ptr_diff.ll +++ b/llvm/test/Transforms/InstSimplify/ptr_diff.ll @@ -1,11 +1,9 @@ -; NOTE: Assertions have been autogenerated by update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" -define i64 @ptrdiff1(ptr %ptr) { -; CHECK-LABEL: @ptrdiff1( -; CHECK: ret i64 42 +define i64 @ptrdiff(ptr %ptr) { +; CHECK-LABEL: @ptrdiff( +; CHECK-NEXT: ret i64 42 ; %last = getelementptr inbounds i8, ptr %ptr, i32 42 %first.int = ptrtoint ptr %ptr to i64 @@ -14,9 +12,24 @@ define i64 @ptrdiff1(ptr %ptr) { ret i64 %diff } -define i64 @ptrdiff2(ptr %ptr) { -; CHECK-LABEL: @ptrdiff2( -; CHECK: ret i64 42 +define i64 @ptrdiff_no_inbounds(ptr %ptr) { +; CHECK-LABEL: @ptrdiff_no_inbounds( +; CHECK-NEXT: [[LAST:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 42 +; CHECK-NEXT: [[FIRST_INT:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[LAST_INT:%.*]] = ptrtoint ptr [[LAST]] to i64 +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[LAST_INT]], [[FIRST_INT]] +; CHECK-NEXT: ret i64 [[DIFF]] +; + %last = getelementptr i8, ptr %ptr, i32 42 + %first.int = ptrtoint ptr %ptr to i64 + %last.int = ptrtoint ptr %last to i64 + %diff = sub i64 %last.int, %first.int + ret i64 %diff +} + +define i64 @ptrdiff_chain(ptr %ptr) { +; CHECK-LABEL: @ptrdiff_chain( +; CHECK-NEXT: ret i64 42 ; %first2 = getelementptr inbounds i8, ptr %ptr, i32 1 %first3 = getelementptr inbounds i8, ptr %first2, i32 2 @@ -31,26 +44,10 @@ define i64 @ptrdiff2(ptr %ptr) { ret i64 %diff } -define i64 @ptrdiff3(ptr %ptr) { -; Don't bother with non-inbounds GEPs. -; CHECK-LABEL: @ptrdiff3( -; CHECK: [[LAST:%.*]] = getelementptr i8, ptr %ptr, i32 42 -; CHECK-NEXT: [[FIRST_INT:%.*]] = ptrtoint ptr %ptr to i64 -; CHECK-NEXT: [[LAST_INT:%.*]] = ptrtoint ptr [[LAST]] to i64 -; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[LAST_INT]], [[FIRST_INT]] -; CHECK-NEXT: ret i64 [[DIFF]] -; - %last = getelementptr i8, ptr %ptr, i32 42 - %first.int = ptrtoint ptr %ptr to i64 - %last.int = ptrtoint ptr %last to i64 - %diff = sub i64 %last.int, %first.int - ret i64 %diff -} - -define <4 x i32> @ptrdiff4(<4 x ptr> %arg) nounwind { ; Handle simple cases of vectors of pointers. -; CHECK-LABEL: @ptrdiff4( -; CHECK: ret <4 x i32> zeroinitializer +define <4 x i32> @ptrdiff_vectors(<4 x ptr> %arg) nounwind { +; CHECK-LABEL: @ptrdiff_vectors( +; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %p1 = ptrtoint <4 x ptr> %arg to <4 x i32> %bc = bitcast <4 x ptr> %arg to <4 x ptr> @@ -63,9 +60,9 @@ define <4 x i32> @ptrdiff4(<4 x ptr> %arg) nounwind { @global = internal global %struct.ham zeroinitializer, align 4 -define i32 @ptrdiff5() nounwind { -; CHECK-LABEL: @ptrdiff5( -; CHECK: bb: +define i32 @ptrdiff_global() nounwind { +; CHECK-LABEL: @ptrdiff_global( +; CHECK-NEXT: bb: ; CHECK-NEXT: ret i32 0 ; bb: diff --git a/llvm/test/Transforms/LICM/vector-intrinsics.ll b/llvm/test/Transforms/LICM/vector-intrinsics.ll new file mode 100644 index 0000000..351773e --- /dev/null +++ b/llvm/test/Transforms/LICM/vector-intrinsics.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes='loop-mssa(licm)' -verify-memoryssa %s | FileCheck %s + +define i32 @reduce_umax(<2 x i32> %inv, i1 %c) { +; CHECK-LABEL: define i32 @reduce_umax( +; CHECK-SAME: <2 x i32> [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[REDUCE_UMAX:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[INV]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[REDUCE_UMAX]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %reduce.umax = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %inv) + %backedge.cond = icmp ult i32 %iv, %reduce.umax + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_umax(<2 x i32> %inv.l, <2 x i32> %inv.r, i1 %c) { +; CHECK-LABEL: define i32 @vp_umax( +; CHECK-SAME: <2 x i32> [[INV_L:%.*]], <2 x i32> [[INV_R:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[VP_UMAX:%.*]] = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> [[INV_L]], <2 x i32> [[INV_R]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UMAX]], i32 0 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.umax = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %inv.l, <2 x i32> %inv.r, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.umax, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_udiv(<2 x i32> %inv.q, <2 x i32> %inv.d, i1 %c) { +; CHECK-LABEL: define i32 @vp_udiv( +; CHECK-SAME: <2 x i32> [[INV_Q:%.*]], <2 x i32> [[INV_D:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_UDIV:%.*]] = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> [[INV_Q]], <2 x i32> [[INV_D]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UDIV]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.udiv = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %inv.q, <2 x i32> %inv.d, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.udiv, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_load(ptr %inv, i1 %c) { +; CHECK-LABEL: define i32 @vp_load( +; CHECK-SAME: ptr [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_LOAD:%.*]] = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr [[INV]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_LOAD]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.load = call <2 x i32> @llvm.vp.load.v2i32(ptr %inv, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.load, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_store(<2 x i32> %inv.v, ptr %inv.p, i1 %c) { +; CHECK-LABEL: define i32 @vp_store( +; CHECK-SAME: <2 x i32> [[INV_V:%.*]], ptr [[INV_P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: call void @llvm.vp.store.v2i32.p0(<2 x i32> [[INV_V]], ptr [[INV_P]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], 10 +; CHECK-NEXT: br i1 [[BACKEDGE_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + call void @llvm.vp.store.v2i32(<2 x i32> %inv.v, ptr %inv.p, <2 x i1> splat (i1 1), i32 2) + %backedge.cond = icmp ult i32 %iv, 10 + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll index d73900d..83b494a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll @@ -2288,7 +2288,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { } ;. ; CHECK: attributes #[[ATTR0]] = { "target-features"="+v" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2]] = { "vector-function-abi-variant"="_ZGVrNxv_acos(Sleef_acosdx_u10rvvm2)" } ; CHECK: attributes #[[ATTR3]] = { "vector-function-abi-variant"="_ZGVrNxv_acosf(Sleef_acosfx_u10rvvm2)" } ; CHECK: attributes #[[ATTR4]] = { "vector-function-abi-variant"="_ZGVrNxv_acosh(Sleef_acoshdx_u10rvvm2)" } diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 3500c5c..4fd8d17 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -546,19 +546,50 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe ; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]]) ; CHECK-NEXT: [[N:%.*]] = add i32 [[SEL]], -1 ; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SEL]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ 0, [[PH]] ] +; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[PH]] ] -; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[IV]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I]], align 1 ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0 -; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH1]] ; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV]], [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ] +; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV1]], [[LOOP_HEADER1]] ], [ 0, [[LOOP_LATCH1]] ], [ 0, [[LOOP_LATCH]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ -2, [[THEN]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ] @@ -609,4 +640,6 @@ exit: ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll index 9acc6d6..09f583f 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll @@ -39,5 +39,4 @@ declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0 ; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s index da83c54..8348c97 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s @@ -1,10 +1,12 @@ REQUIRES: aarch64-registered-target -// Flakey on SVE buildbots, disabled pending invesgitation. +// This will sometimes fail with "Not all operands were initialized by the snippet generator for...". UNSUPPORTED: target={{.*}} RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%t.obj --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1 RUN: llvm-objdump -d %t.obj > %t.s RUN: FileCheck %s < %t.s +// Start matching after the printed file path, as that may contain something that looks like a mnemonic. +CHECK: Disassembly of section .text: CHECK-NOT: ld{{[1-4]}} CHECK-NOT: st{{[1-4]}} |