diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll b/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll new file mode 100644 index 0000000..5af2b82 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll @@ -0,0 +1,296 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s + +@g1 = protected addrspace(1) externally_initialized global i32 0, align 4 +@g2 = protected addrspace(1) externally_initialized global i32 0, align 4 +@g3 = protected addrspace(1) externally_initialized global i32 0, align 4 +@g4 = protected addrspace(1) externally_initialized global i32 0, align 4 + +;. +; CHECK: @g1 = protected addrspace(1) externally_initialized global i32 0, align 4 +; CHECK: @g2 = protected addrspace(1) externally_initialized global i32 0, align 4 +; CHECK: @g3 = protected addrspace(1) externally_initialized global i32 0, align 4 +; CHECK: @g4 = protected addrspace(1) externally_initialized global i32 0, align 4 +;. +define internal fastcc void @callee_infer(ptr addrspace(1) %x, i32 %y) { +; CHECK-LABEL: define {{[^@]+}}@callee_infer +; CHECK-SAME: (ptr addrspace(1) inreg [[X:%.*]], i32 inreg [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4 +; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4 +; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4 +; CHECK-NEXT: ret void +; +entry: + %x.val = load i32, ptr addrspace(1) %x, align 4 + store i32 %x.val, ptr addrspace(1) @g3, align 4 + store i32 %y, ptr addrspace(1) @g4, align 4 + ret void +} + +define amdgpu_kernel void @kernel_infer(ptr addrspace(1) %p1, ptr addrspace(1) %p2, i32 %x) { +; CHECK-LABEL: define {{[^@]+}}@kernel_infer +; CHECK-SAME: (ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]]) +; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) @g1, i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]]) +; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) @g2, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 1) +; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) @g1, i32 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 2) +; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) @g2, i32 [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) [[P]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]]) +; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) [[TMP4]], i32 [[TMP5]]) +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %x, 0 + %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2 + tail call fastcc void @callee_infer(ptr addrspace(1) @g1, i32 %x) + tail call fastcc void @callee_infer(ptr addrspace(1) @g2, i32 %x) + tail call fastcc void @callee_infer(ptr addrspace(1) @g1, i32 1) + tail call fastcc void @callee_infer(ptr addrspace(1) @g2, i32 2) + tail call fastcc void @callee_infer(ptr addrspace(1) %p, i32 %x) + ret void +} + +define amdgpu_kernel void @kernel_infer_indirect(ptr addrspace(1) %p1, ptr addrspace(1) %p2, i32 %x) { +; CHECK-LABEL: define {{[^@]+}}@kernel_infer_indirect +; CHECK-SAME: (ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]], i32 [[X:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]] +; CHECK-NEXT: store ptr @kernel_infer, ptr addrspace(5) [[FN]], align 8 +; CHECK-NEXT: [[FN_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FN]] to ptr +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g1, i32 [[X]]) +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g2, i32 [[X]]) +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g1, i32 1) +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g2, i32 2) +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[P]], i32 [[X]]) +; CHECK-NEXT: ret void +; +entry: + %fn = alloca ptr, addrspace(5) + %cmp = icmp sgt i32 %x, 0 + %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2 + store ptr @kernel_infer, ptr addrspace(5) %fn + %fn.cast = addrspacecast ptr addrspace(5) %fn to ptr + tail call fastcc void %fn.cast(ptr addrspace(1) @g1, i32 %x) + tail call fastcc void %fn.cast(ptr addrspace(1) @g2, i32 %x) + tail call fastcc void %fn.cast(ptr addrspace(1) @g1, i32 1) + tail call fastcc void %fn.cast(ptr addrspace(1) @g2, i32 2) + tail call fastcc void %fn.cast(ptr addrspace(1) %p, i32 %x) + ret void +} + +define internal fastcc void @callee_not_infer(ptr addrspace(1) %x, i32 %y) { +; CHECK-LABEL: define {{[^@]+}}@callee_not_infer +; CHECK-SAME: (ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4 +; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4 +; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4 +; CHECK-NEXT: ret void +; +entry: + %x.val = load i32, ptr addrspace(1) %x, align 4 + store i32 %x.val, ptr addrspace(1) @g3, align 4 + store i32 %y, ptr addrspace(1) @g4, align 4 + ret void +} + +define amdgpu_kernel void @kernel_not_infer(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) { +; CHECK-LABEL: define {{[^@]+}}@kernel_not_infer +; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]] +; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0 +; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]] +; CHECK-NEXT: tail call fastcc void @callee_not_infer(ptr addrspace(1) [[Q]], i32 [[ID_X]]) +; CHECK-NEXT: tail call fastcc void @callee_not_infer(ptr addrspace(1) [[P]], i32 [[ID_X]]) +; CHECK-NEXT: ret void +; +entry: + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x + %d = load i32, ptr addrspace(1) %gep + %cmp = icmp sgt i32 %d, 0 + %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2 + tail call fastcc void @callee_not_infer(ptr addrspace(1) %q, i32 %id.x) + tail call fastcc void @callee_not_infer(ptr addrspace(1) %p, i32 %id.x) + ret void +} + +define amdgpu_kernel void @kernel_not_infer_indirect(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) { +; CHECK-LABEL: define {{[^@]+}}@kernel_not_infer_indirect +; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]] +; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0 +; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]] +; CHECK-NEXT: store ptr @kernel_not_infer, ptr addrspace(5) [[FN]], align 8 +; CHECK-NEXT: [[FN_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FN]] to ptr +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[Q]], i32 [[ID_X]]) +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[P]], i32 [[ID_X]]) +; CHECK-NEXT: ret void +; +entry: + %fn = alloca ptr, addrspace(5) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x + %d = load i32, ptr addrspace(1) %gep + %cmp = icmp sgt i32 %d, 0 + %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2 + store ptr @kernel_not_infer, ptr addrspace(5) %fn + %fn.cast = addrspacecast ptr addrspace(5) %fn to ptr + tail call fastcc void %fn.cast(ptr addrspace(1) %q, i32 %id.x) + tail call fastcc void %fn.cast(ptr addrspace(1) %p, i32 %id.x) + ret void +} + +define internal fastcc void @cs_callee_not_infer(ptr addrspace(1) %x, i32 %y) { +; CHECK-LABEL: define {{[^@]+}}@cs_callee_not_infer +; CHECK-SAME: (ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4 +; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4 +; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4 +; CHECK-NEXT: ret void +; +entry: + %x.val = load i32, ptr addrspace(1) %x, align 4 + store i32 %x.val, ptr addrspace(1) @g3, align 4 + store i32 %y, ptr addrspace(1) @g4, align 4 + ret void +} + +define amdgpu_cs void @cs_kernel_not_infer(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) { +; CHECK-LABEL: define {{[^@]+}}@cs_kernel_not_infer +; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]] +; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0 +; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]] +; CHECK-NEXT: tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) [[Q]], i32 [[ID_X]]) +; CHECK-NEXT: tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) [[P]], i32 [[ID_X]]) +; CHECK-NEXT: ret void +; +entry: + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x + %d = load i32, ptr addrspace(1) %gep + %cmp = icmp sgt i32 %d, 0 + %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2 + tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) %q, i32 %id.x) + tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) %p, i32 %id.x) + ret void +} + +define internal fastcc void @cs_callee_not_infer_indirect(ptr addrspace(1) %x, i32 %y) { +; CHECK-LABEL: define {{[^@]+}}@cs_callee_not_infer_indirect +; CHECK-SAME: (ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4 +; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4 +; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4 +; CHECK-NEXT: ret void +; +entry: + %x.val = load i32, ptr addrspace(1) %x, align 4 + store i32 %x.val, ptr addrspace(1) @g3, align 4 + store i32 %y, ptr addrspace(1) @g4, align 4 + ret void +} + + +define amdgpu_cs void @cs_kernel_not_infer_indirect(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) { +; CHECK-LABEL: define {{[^@]+}}@cs_kernel_not_infer_indirect +; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]] +; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0 +; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]] +; CHECK-NEXT: store ptr @cs_callee_not_infer_indirect, ptr addrspace(5) [[FN]], align 8 +; CHECK-NEXT: [[FN_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FN]] to ptr +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[Q]], i32 [[ID_X]]) +; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[P]], i32 [[ID_X]]) +; CHECK-NEXT: ret void +; +entry: + %fn = alloca ptr, addrspace(5) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x + %d = load i32, ptr addrspace(1) %gep + %cmp = icmp sgt i32 %d, 0 + %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2 + store ptr @cs_callee_not_infer_indirect, ptr addrspace(5) %fn + %fn.cast = addrspacecast ptr addrspace(5) %fn to ptr + tail call fastcc void %fn.cast(ptr addrspace(1) %q, i32 %id.x) + tail call fastcc void %fn.cast(ptr addrspace(1) %p, i32 %id.x) + ret void +} + +define internal fastcc void @callee_with_inreg(ptr addrspace(1) inreg %x, i32 inreg %y) { +; CHECK-LABEL: define {{[^@]+}}@callee_with_inreg +; CHECK-SAME: (ptr addrspace(1) inreg [[X:%.*]], i32 inreg [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4 +; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4 +; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4 +; CHECK-NEXT: ret void +; +entry: + %x.val = load i32, ptr addrspace(1) %x, align 4 + store i32 %x.val, ptr addrspace(1) @g3, align 4 + store i32 %y, ptr addrspace(1) @g4, align 4 + ret void +} + +define amdgpu_kernel void @kernel_without_readfirstlane(ptr addrspace(1) %p, i32 %x) { +; CHECK-LABEL: define {{[^@]+}}@kernel_without_readfirstlane +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call fastcc void @callee_with_inreg(ptr addrspace(1) [[P]], i32 [[X]]) +; CHECK-NEXT: ret void +; +entry: + tail call fastcc void @callee_with_inreg(ptr addrspace(1) %p, i32 %x) + ret void +} + +define amdgpu_kernel void @kernel_with_readfirstlane(ptr addrspace(1) %p, i32 %x) { +; CHECK-LABEL: define {{[^@]+}}@kernel_with_readfirstlane +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P0:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) [[P]]) +; CHECK-NEXT: tail call fastcc void @callee_with_inreg(ptr addrspace(1) [[P0]], i32 [[X]]) +; CHECK-NEXT: ret void +; +entry: + %p0 = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) %p) + tail call fastcc void @callee_with_inreg(ptr addrspace(1) %p0, i32 %x) + ret void +} + +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. |