aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp26
-rw-r--r--llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll84
-rw-r--r--llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll77
-rw-r--r--llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll8
6 files changed, 154 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 4db5d02..d5f9255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -513,11 +513,15 @@ public:
ArrayType *AllKernelsOffsetsType =
ArrayType::get(KernelOffsetsType, NumberKernels);
+ Constant *Missing = PoisonValue::get(KernelOffsetsType);
std::vector<Constant *> overallConstantExprElts(NumberKernels);
for (size_t i = 0; i < NumberKernels; i++) {
- LDSVariableReplacement Replacement = KernelToReplacement[kernels[i]];
- overallConstantExprElts[i] = getAddressesOfVariablesInKernel(
- Ctx, Variables, Replacement.LDSVarsToConstantGEP);
+ auto Replacement = KernelToReplacement.find(kernels[i]);
+ overallConstantExprElts[i] =
+ (Replacement == KernelToReplacement.end())
+ ? Missing
+ : getAddressesOfVariablesInKernel(
+ Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
}
Constant *init =
@@ -911,6 +915,7 @@ public:
// Create a struct for each kernel for the non-module-scope variables.
+ IRBuilder<> Builder(M.getContext());
DenseMap<Function *, LDSVariableReplacement> KernelToReplacement;
for (Function &Func : M.functions()) {
if (Func.isDeclaration() || !isKernelLDS(&Func))
@@ -963,6 +968,14 @@ public:
auto Replacement =
createLDSVariableReplacement(M, VarName, KernelUsedVariables);
+ // If any indirect uses, create a direct use to ensure allocation
+ // TODO: Simpler to unconditionally mark used but that regresses
+ // codegen in test/CodeGen/AMDGPU/noclobber-barrier.ll
+ auto Accesses = LDSUsesInfo.indirect_access.find(&Func);
+ if ((Accesses != LDSUsesInfo.indirect_access.end()) &&
+ !Accesses->second.empty())
+ markUsedByKernel(Builder, &Func, Replacement.SGV);
+
// remove preserves existing codegen
removeLocalVarsFromUsedLists(M, KernelUsedVariables);
KernelToReplacement[&Func] = Replacement;
@@ -1156,8 +1169,6 @@ public:
DenseSet<GlobalVariable *> Vec;
Vec.insert(GV);
- // TODO: Looks like a latent bug, Replacement may not be marked
- // UsedByKernel here
replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) {
return isa<Instruction>(U.getUser());
});
@@ -1172,11 +1183,6 @@ public:
LLVMContext &Ctx = M.getContext();
IRBuilder<> Builder(Ctx);
- for (size_t i = 0; i < OrderedKernels.size(); i++) {
- markUsedByKernel(Builder, OrderedKernels[i],
- KernelToReplacement[OrderedKernels[i]].SGV);
- }
-
// The order must be consistent between lookup table and accesses to
// lookup table
std::vector<GlobalVariable *> TableLookupVariablesOrdered(
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll
new file mode 100644
index 0000000..6ed7345
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=hybrid < %s | FileCheck %s
+
+;; Reduced from a larger test case. Checks that functions and kernels that use only dynamic lds
+;; are lowered successfully. Previously they only worked if the kernel happened to also use static lds
+;; variables. Artefact of implementing dynamic variables by adapting existing code for static.
+
+@A = external addrspace(3) global [8 x ptr]
+@B = external addrspace(3) global [0 x i32]
+
+define amdgpu_kernel void @kernel_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_0() !llvm.amdgcn.lds.kernel.id !1 {
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ]
+; CHECK-NEXT: call void @call_store_A()
+; CHECK-NEXT: ret void
+;
+ call void @call_store_A()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_1() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_1() !llvm.amdgcn.lds.kernel.id !2 {
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ]
+; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
+; CHECK-NEXT: ret void
+;
+ %ptr = call ptr @get_B_ptr()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_2() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_2() !llvm.amdgcn.lds.kernel.id !3 {
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ]
+; CHECK-NEXT: call void @store_A()
+; CHECK-NEXT: ret void
+;
+ call void @store_A()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_3() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_3() !llvm.amdgcn.lds.kernel.id !4 {
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ]
+; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
+; CHECK-NEXT: ret void
+;
+ %ptr = call ptr @get_B_ptr()
+ ret void
+}
+
+define private void @call_store_A() {
+; CHECK-LABEL: define private void @call_store_A() {
+; CHECK-NEXT: call void @store_A()
+; CHECK-NEXT: ret void
+;
+ call void @store_A()
+ ret void
+}
+
+define private void @store_A() {
+; CHECK-LABEL: define private void @store_A() {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
+; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr
+; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8
+; CHECK-NEXT: ret void
+;
+ store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null
+ ret void
+}
+
+define private ptr @get_B_ptr() {
+; CHECK-LABEL: define private ptr @get_B_ptr() {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4
+; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr
+; CHECK-NEXT: ret ptr [[TMP3]]
+;
+ ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
+}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
index e08bef6a..81b2174 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
@@ -17,6 +17,7 @@
; GCN: ds_write_b8 [[NULL]], [[TWO]] offset:16
define amdgpu_kernel void @k0() {
; OPT-LABEL: @k0(
+; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds) ]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: store i8 1, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
; OPT-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
index b3a0f94..007e777 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
@@ -28,9 +28,9 @@ define amdgpu_kernel void @k0() {
@f0.lds = addrspace(3) global i16 undef
define void @f0() {
; MODULE-LABEL: @f0(
-; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
+; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1:![0-9]+]], !noalias [[META4:![0-9]+]]
; MODULE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
-; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
+; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1]], !noalias [[META4]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f0(
@@ -60,7 +60,7 @@ define void @f0() {
define amdgpu_kernel void @k_f0() {
; MODULE-LABEL: @k_f0(
-; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope !5, !noalias !1
+; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
; MODULE-NEXT: call void @f0()
; MODULE-NEXT: ret void
;
@@ -70,6 +70,7 @@ define amdgpu_kernel void @k_f0() {
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @k_f0(
+; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds) ]
; K_OR_HY-NEXT: call void @f0()
; K_OR_HY-NEXT: ret void
;
@@ -82,9 +83,9 @@ define amdgpu_kernel void @k_f0() {
@both.lds = addrspace(3) global i32 undef
define void @f_both() {
; MODULE-LABEL: @f_both(
-; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
+; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
-; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
+; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f_both(
@@ -115,9 +116,9 @@ define void @f_both() {
define amdgpu_kernel void @k0_both() {
; MODULE-LABEL: @k0_both(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
-; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
+; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
-; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
+; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: call void @f_both()
; MODULE-NEXT: ret void
;
@@ -130,6 +131,7 @@ define amdgpu_kernel void @k0_both() {
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @k0_both(
+; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds) ]
; K_OR_HY-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
; K_OR_HY-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
index 75211d5..0844b38 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
@@ -19,12 +19,11 @@
; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8, !absolute_symbol !2
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]]
-;.
define void @f0() {
; OPT-LABEL: @f0(
-; OPT-NEXT: %ld = load float, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
-; OPT-NEXT: %mul = fmul float %ld, 2.000000e+00
-; OPT-NEXT: store float %mul, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
+; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
+; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00
+; OPT-NEXT: store float [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: f0:
@@ -46,9 +45,9 @@ define void @f0() {
define void @f1() {
; OPT-LABEL: @f1(
-; OPT-NEXT: %ld = load i16, ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
-; OPT-NEXT: %mul = mul i16 %ld, 3
-; OPT-NEXT: store i16 %mul, ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
+; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
+; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
+; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
; OPT-NEXT: ret void
;
; GCN-LABEL: f1:
@@ -70,16 +69,16 @@ define void @f1() {
define void @f2() {
; OPT-LABEL: @f2(
-; OPT-NEXT: %1 = call i32 @llvm.amdgcn.lds.kernel.id()
-; OPT-NEXT: %v22 = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 %1, i32 0
-; OPT-NEXT: %2 = load i32, ptr addrspace(4) %v22, align 4
-; OPT-NEXT: %v23 = inttoptr i32 %2 to ptr addrspace(3)
-; OPT-NEXT: %ld = load i64, ptr addrspace(3) %v23, align 4
-; OPT-NEXT: %mul = mul i64 %ld, 4
-; OPT-NEXT: %v2 = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 %1, i32 0
-; OPT-NEXT: %3 = load i32, ptr addrspace(4) %v2, align 4
-; OPT-NEXT: %v21 = inttoptr i32 %3 to ptr addrspace(3)
-; OPT-NEXT: store i64 %mul, ptr addrspace(3) %v21, align 4
+; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
+; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
+; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 4
+; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
+; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
+; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
+; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: f2:
@@ -111,9 +110,9 @@ define void @f2() {
define void @f3() {
; OPT-LABEL: @f3(
-; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
-; OPT-NEXT: %mul = mul i8 %ld, 5
-; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
+; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K23_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
+; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5
+; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K23_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
; OPT-NEXT: ret void
;
; GCN-LABEL: f3:
@@ -136,9 +135,9 @@ define void @f3() {
; Doesn't access any via a function, won't be in the lookup table
define amdgpu_kernel void @kernel_no_table() {
; OPT-LABEL: @kernel_no_table(
-; OPT-NEXT: %ld = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
-; OPT-NEXT: %mul = mul i64 %ld, 8
-; OPT-NEXT: store i64 %mul, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
+; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
+; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 8
+; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
; OPT-NEXT: ret void
;
; GCN-LABEL: kernel_no_table:
@@ -159,6 +158,7 @@ define amdgpu_kernel void @kernel_no_table() {
; Access two variables, will allocate those two
define amdgpu_kernel void @k01() {
; OPT-LABEL: @k01(
+; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: call void @f0()
; OPT-NEXT: call void @f1()
@@ -193,7 +193,7 @@ define amdgpu_kernel void @k01() {
define amdgpu_kernel void @k23() {
; OPT-LABEL: @k23(
-; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ]
+; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; OPT-NEXT: call void @f2()
; OPT-NEXT: call void @f3()
; OPT-NEXT: ret void
@@ -231,12 +231,12 @@ define amdgpu_kernel void @k23() {
; Access and allocate three variables
define amdgpu_kernel void @k123() {
; OPT-LABEL: @k123(
-; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
+; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: call void @f1()
-; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
-; OPT-NEXT: %mul = mul i8 %ld, 8
-; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
+; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META13]], !noalias [[META10]]
+; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8
+; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META13]], !noalias [[META10]]
; OPT-NEXT: call void @f2()
; OPT-NEXT: ret void
;
@@ -284,22 +284,25 @@ define amdgpu_kernel void @k123() {
!2 = !{i32 1}
-;.
; OPT: attributes #0 = { "amdgpu-elide-module-lds" }
; OPT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
; OPT: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-;.
+
; OPT: !0 = !{i64 0, i64 1}
; OPT: !1 = !{i64 4, i64 5}
; OPT: !2 = !{i64 8, i64 9}
; OPT: !3 = !{i32 1}
-; OPT: !4 = !{i32 0}
-; OPT: !5 = !{!6}
-; OPT: !6 = distinct !{!6, !7}
-; OPT: !7 = distinct !{!7}
-; OPT: !8 = !{!9}
-; OPT: !9 = distinct !{!9, !7}
-;.
+; OPT: !4 = !{!5}
+; OPT: !5 = distinct !{!5, !6}
+; OPT: !6 = distinct !{!6}
+; OPT: !7 = !{!8}
+; OPT: !8 = distinct !{!8, !6}
+; OPT: !9 = !{i32 0}
+; OPT: !10 = !{!11}
+; OPT: !11 = distinct !{!11, !12}
+; OPT: !12 = distinct !{!12}
+; OPT: !13 = !{!14}
+; OPT: !14 = distinct !{!14, !12}
; Table size length number-kernels * number-variables * sizeof(uint16_t)
; GCN: .type llvm.amdgcn.lds.offset.table,@object
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
index 5b7da29..930b588 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
@@ -256,7 +256,7 @@ define amdgpu_kernel void @k01() {
}
define amdgpu_kernel void @k23() {
-; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !2 {
+; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !7 {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ]
; OPT-NEXT: call void @f2()
; OPT-NEXT: call void @f3()
@@ -295,12 +295,12 @@ define amdgpu_kernel void @k23() {
; Access and allocate three variables
define amdgpu_kernel void @k123() {
-; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !3 {
+; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !13 {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
; OPT-NEXT: call void @f1()
-; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7
+; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !20, !noalias !21
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8
-; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7
+; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !20, !noalias !21
; OPT-NEXT: call void @f2()
; OPT-NEXT: ret void
;