; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s ; Regression test for a bug where addAliasScopeMetadata skipped memory- ; accessing calls with no pointer arguments, leaving them without !noalias ; metadata. This caused AA to conservatively report them as potential ; clobbers of noalias kernel arguments, blocking downstream scalarization ; in AMDGPUAnnotateUniformValues and causing severe performance regressions ; (e.g. in rocFFT). declare i32 @memory_read_no_ptr_args() #1 ; The call reads memory but has no pointer arguments — it cannot alias ; any noalias kernel argument. The pass must add !noalias metadata to it. define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { ; CHECK-LABEL: define amdgpu_kernel void @call_without_ptr_args( ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0 ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0:![0-9]+]] ; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 8 ; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]] ; CHECK-NEXT: [[VAL:%.*]] = call i32 @memory_read_no_ptr_args(), !noalias [[META1:![0-9]+]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[VAL]] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META6:![0-9]+]] ; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META6]], !noalias [[META5]] ; CHECK-NEXT: ret void ; %val = call i32 @memory_read_no_ptr_args() %gep = getelementptr i32, ptr addrspace(1) %in, i32 %val %load = load i32, ptr addrspace(1) %gep, align 4 store i32 %load, ptr addrspace(1) %out, align 4 ret void } ; Same scenario but the call is readnone — should NOT get noalias metadata ; because it doesn't access memory at all and is skipped by the pass. declare i32 @readnone_no_ptr_args() #2 define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noalias %out) #0 { ; CHECK-LABEL: define amdgpu_kernel void @readnone_call_without_ptr_args( ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0 ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; CHECK-NEXT: [[VAL:%.*]] = call i32 @readnone_no_ptr_args() ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META7:![0-9]+]] ; CHECK-NEXT: ret void ; %val = call i32 @readnone_no_ptr_args() store i32 %val, ptr addrspace(1) %out, align 4 ret void } ; argmemonly variant: memory(argmem: read) with no pointer arguments. ; This function can only access memory through its pointer arguments, but ; has none — so it effectively cannot access memory at all. The pass must ; still add !noalias metadata since doesNotAccessMemory() returns false. declare i32 @argmemonly_read_no_ptr_args() #3 define amdgpu_kernel void @argmemonly_call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { ; CHECK-LABEL: define amdgpu_kernel void @argmemonly_call_without_ptr_args( ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0 ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 8 ; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]] ; CHECK-NEXT: [[VAL:%.*]] = call i32 @argmemonly_read_no_ptr_args(), !noalias [[META10:![0-9]+]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[VAL]] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]] ; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META15]], !noalias [[META14]] ; CHECK-NEXT: ret void ; %val = call i32 @argmemonly_read_no_ptr_args() %gep = getelementptr i32, ptr addrspace(1) %in, i32 %val %load = load i32, ptr addrspace(1) %gep, align 4 store i32 %load, ptr addrspace(1) %out, align 4 ret void } ; argmemonly with a pointer argument pointing to a noalias kernel arg — ; standard metadata path. The call accesses kernel arg memory through its ; pointer argument and gets both !alias.scope and !noalias as appropriate. declare void @argmemonly_with_ptr_arg(ptr addrspace(1)) #4 define amdgpu_kernel void @argmemonly_call_with_ptr_arg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { ; CHECK-LABEL: define amdgpu_kernel void @argmemonly_call_with_ptr_arg( ; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() ; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT]], i64 0 ; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]] ; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARGMEMONLY_CALL_WITH_PTR_ARG_KERNARG_SEGMENT]], i64 8 ; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[IN_LOAD]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META19:![0-9]+]] ; CHECK-NEXT: call void @argmemonly_with_ptr_arg(ptr addrspace(1) [[OUT_LOAD]]), !alias.scope [[META19]], !noalias [[META16]] ; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META19]], !noalias [[META16]] ; CHECK-NEXT: ret void ; %load = load i32, ptr addrspace(1) %in, align 4 call void @argmemonly_with_ptr_arg(ptr addrspace(1) %out) store i32 %load, ptr addrspace(1) %out, align 4 ret void } attributes #0 = { nounwind } attributes #1 = { nounwind memory(read) } attributes #2 = { nounwind memory(none) } attributes #3 = { nounwind memory(argmem: read) } attributes #4 = { nounwind memory(argmem: readwrite) } ;. ; CHECK: [[META0]] = !{} ; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]]} ; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]], !"out"} ; CHECK: [[META3]] = distinct !{[[META3]], !"call_without_ptr_args"} ; CHECK: [[META4]] = distinct !{[[META4]], [[META3]], !"in"} ; CHECK: [[META5]] = !{[[META4]]} ; CHECK: [[META6]] = !{[[META2]]} ; CHECK: [[META7]] = !{[[META8:![0-9]+]]} ; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"out"} ; CHECK: [[META9]] = distinct !{[[META9]], !"readnone_call_without_ptr_args"} ; CHECK: [[META10]] = !{[[META11:![0-9]+]], [[META13:![0-9]+]]} ; CHECK: [[META11]] = distinct !{[[META11]], [[META12:![0-9]+]], !"out"} ; CHECK: [[META12]] = distinct !{[[META12]], !"argmemonly_call_without_ptr_args"} ; CHECK: [[META13]] = distinct !{[[META13]], [[META12]], !"in"} ; CHECK: [[META14]] = !{[[META13]]} ; CHECK: [[META15]] = !{[[META11]]} ; CHECK: [[META16]] = !{[[META17:![0-9]+]]} ; CHECK: [[META17]] = distinct !{[[META17]], [[META18:![0-9]+]], !"in"} ; CHECK: [[META18]] = distinct !{[[META18]], !"argmemonly_call_with_ptr_arg"} ; CHECK: [[META19]] = !{[[META20:![0-9]+]]} ; CHECK: [[META20]] = distinct !{[[META20]], [[META18]], !"out"} ;.