; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-module-lds -amdgpu-enable-object-linking < %s | FileCheck %s ; Comprehensive test for global-scope (external linkage) LDS in link-time mode. ; External LDS variables remain as standalone external declarations -- they are ; NOT wrapped into per-function structs. ; ; Scenarios covered: ; - Device function and kernel each using distinct global-scope LDS ; - Multiple kernels sharing a device function that uses LDS ; - A single function using multiple LDS variables ; - Transitive call chain where only a leaf uses LDS ; - Kernel directly using LDS (no device-function LDS user) ; -- Global variables -- @lds_shared = addrspace(3) global [64 x i32] poison, align 16 @lds_kernel_a = addrspace(3) global [32 x float] poison, align 4 @lds_kernel_b = addrspace(3) global [16 x i64] poison, align 8 @lds_leaf = addrspace(3) global [8 x i32] poison, align 4 @lds_direct = addrspace(3) global [32 x float] poison, align 4 declare void @extern_func() ; All external-linkage LDS become external declarations. ; CHECK-DAG: @lds_shared = external addrspace(3) global [64 x i32] ; CHECK-DAG: @lds_kernel_a = external addrspace(3) global [32 x float] ; CHECK-DAG: @lds_kernel_b = external addrspace(3) global [16 x i64] ; CHECK-DAG: @lds_leaf = external addrspace(3) global [8 x i32] ; CHECK-DAG: @lds_direct = external addrspace(3) global [32 x float] ; No per-function structs should be created for any function. ; CHECK-NOT: @__amdgpu_lds.shared_func ; CHECK-NOT: @__amdgpu_lds.kernel_a ; CHECK-NOT: @__amdgpu_lds.kernel_b ; CHECK-NOT: @__amdgpu_lds.leaf_func ; CHECK-NOT: @__amdgpu_lds.mid_func ; CHECK-NOT: @__amdgpu_lds.direct_kernel ; --- shared_func: uses lds_shared, called by both kernel_a and kernel_b --- ; CHECK-LABEL: define void @shared_func() ; CHECK: getelementptr [64 x i32], ptr addrspace(3) @lds_shared ; CHECK: call void @extern_func() ; --- kernel_a: uses its own LDS + calls shared_func --- ; CHECK-LABEL: define amdgpu_kernel void @kernel_a() ; CHECK: getelementptr [32 x float], ptr addrspace(3) @lds_kernel_a ; CHECK: call void @shared_func() ; --- kernel_b: uses its own LDS + calls shared_func --- ; CHECK-LABEL: define amdgpu_kernel void @kernel_b() ; CHECK: getelementptr [16 x i64], ptr addrspace(3) @lds_kernel_b ; CHECK: call void @shared_func() ; --- leaf_func: uses lds_leaf (transitive -- called via mid_func) --- ; CHECK-LABEL: define void @leaf_func() ; CHECK: getelementptr [8 x i32], ptr addrspace(3) @lds_leaf ; --- mid_func: no LDS, just calls leaf_func + extern --- ; CHECK-LABEL: define void @mid_func() ; CHECK-NOT: @__amdgpu_lds ; CHECK: call void @leaf_func() ; CHECK: call void @extern_func() ; --- transitive_kernel: calls mid_func (transitive LDS user) --- ; CHECK-LABEL: define amdgpu_kernel void @transitive_kernel() ; CHECK: call void @mid_func() ; --- direct_kernel: kernel directly uses LDS, no device function uses LDS --- ; CHECK-LABEL: define amdgpu_kernel void @direct_kernel() ; CHECK: getelementptr [32 x float], ptr addrspace(3) @lds_direct ; Metadata: one entry per (function, variable) pair for direct users only. ; CHECK: !amdgpu.lds.uses = !{{{![0-9]+, ![0-9]+, ![0-9]+, ![0-9]+, ![0-9]+}}} ; CHECK-DAG: !{ptr @shared_func, ptr addrspace(3) @lds_shared} ; CHECK-DAG: !{ptr @kernel_a, ptr addrspace(3) @lds_kernel_a} ; CHECK-DAG: !{ptr @kernel_b, ptr addrspace(3) @lds_kernel_b} ; CHECK-DAG: !{ptr @leaf_func, ptr addrspace(3) @lds_leaf} ; CHECK-DAG: !{ptr @direct_kernel, ptr addrspace(3) @lds_direct} define void @shared_func() { %gep = getelementptr [64 x i32], ptr addrspace(3) @lds_shared, i32 0, i32 0 store i32 1, ptr addrspace(3) %gep call void @extern_func() ret void } define amdgpu_kernel void @kernel_a() { %gep = getelementptr [32 x float], ptr addrspace(3) @lds_kernel_a, i32 0, i32 0 store float 1.0, ptr addrspace(3) %gep call void @shared_func() ret void } define amdgpu_kernel void @kernel_b() { %gep = getelementptr [16 x i64], ptr addrspace(3) @lds_kernel_b, i32 0, i32 0 store i64 1, ptr addrspace(3) %gep call void @shared_func() ret void } define void @leaf_func() { %gep = getelementptr [8 x i32], ptr addrspace(3) @lds_leaf, i32 0, i32 0 store i32 42, ptr addrspace(3) %gep ret void } define void @mid_func() { call void @leaf_func() call void @extern_func() ret void } define amdgpu_kernel void @transitive_kernel() { call void @mid_func() ret void } define amdgpu_kernel void @direct_kernel() { %gep = getelementptr [32 x float], ptr addrspace(3) @lds_direct, i32 0, i32 0 store float 1.0, ptr addrspace(3) %gep call void @extern_func() ret void }