diff options
| author | Johannes Doerfert <johannes@jdoerfert.de> | 2023-10-21 12:05:59 -0700 |
|---|---|---|
| committer | Johannes Doerfert <johannes@jdoerfert.de> | 2023-10-21 12:31:06 -0700 |
| commit | ba87fba80632b2149de0276d3fd41601f8dcf5e0 (patch) | |
| tree | c3822bea15eedadb61d8192ce8a78763f1d921f1 | |
| parent | bb96093ca620a43bf56c0064196556c7aad6685f (diff) | |
| download | llvm-ba87fba80632b2149de0276d3fd41601f8dcf5e0.zip llvm-ba87fba80632b2149de0276d3fd41601f8dcf5e0.tar.gz llvm-ba87fba80632b2149de0276d3fd41601f8dcf5e0.tar.bz2 | |
[Attributor] Ignore different kernels for kernel lifetime objects
If a potential interfering access is in a different kernel and the
underlying object has kernel lifetime we can straight out ignore the
interfering access.
TODO: This should be made much stronger via "reaching kernels", which we
already track in AAKernelInfo.
| -rw-r--r-- | llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 21 | ||||
| -rw-r--r-- | llvm/test/Transforms/Attributor/value-simplify-gpu.ll | 6 |
2 files changed, 20 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 3808eef..e062acd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1200,6 +1200,11 @@ struct AAPointerInfoImpl A, this, IRPosition::function(Scope), DepClassTy::OPTIONAL, IsKnownNoRecurse); + // TODO: Use reaching kernels from AAKernelInfo (or move it to + // AAExecutionDomain) such that we allow scopes other than kernels as long + // as the reaching kernels are disjoint. + bool InstInKernel = Scope.hasFnAttribute("kernel"); + bool ObjHasKernelLifetime = false; const bool UseDominanceReasoning = FindInterferingWrites && IsKnownNoRecurse; const DominatorTree *DT = @@ -1232,6 +1237,7 @@ struct AAPointerInfoImpl // If the alloca containing function is not recursive the alloca // must be dead in the callee. const Function *AIFn = AI->getFunction(); + ObjHasKernelLifetime = AIFn->hasFnAttribute("kernel"); bool IsKnownNoRecurse; if (AA::hasAssumedIRAttr<Attribute::NoRecurse>( A, this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL, @@ -1241,7 +1247,8 @@ struct AAPointerInfoImpl } else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) { // If the global has kernel lifetime we can stop if we reach a kernel // as it is "dead" in the (unknown) callees. - if (HasKernelLifetime(GV, *GV->getParent())) + ObjHasKernelLifetime = HasKernelLifetime(GV, *GV->getParent()); + if (ObjHasKernelLifetime) IsLiveInCalleeCB = [](const Function &Fn) { return !Fn.hasFnAttribute("kernel"); }; @@ -1252,6 +1259,15 @@ struct AAPointerInfoImpl AA::InstExclusionSetTy ExclusionSet; auto AccessCB = [&](const Access &Acc, bool Exact) { + Function *AccScope = Acc.getRemoteInst()->getFunction(); + bool AccInSameScope = AccScope == &Scope; + + // If the object has kernel lifetime we can ignore accesses only reachable + // by other kernels. For now we only skip accesses *in* other kernels. + if (InstInKernel && ObjHasKernelLifetime && !AccInSameScope && + AccScope->hasFnAttribute("kernel")) + return true; + if (Exact && Acc.isMustAccess() && Acc.getRemoteInst() != &I) { if (Acc.isWrite() || (isa<LoadInst>(I) && Acc.isWriteOrAssumption())) ExclusionSet.insert(Acc.getRemoteInst()); @@ -1262,8 +1278,7 @@ struct AAPointerInfoImpl return true; bool Dominates = FindInterferingWrites && DT && Exact && - Acc.isMustAccess() && - (Acc.getRemoteInst()->getFunction() == &Scope) && + Acc.isMustAccess() && AccInSameScope && DT->dominates(Acc.getRemoteInst(), &I); if (Dominates) DominatingWrites.insert(&Acc); diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll index b57cf75..a7e0a92 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -401,8 +401,7 @@ define dso_local void @kernel3(i32 %C) norecurse "kernel" { ; TUNIT-NEXT: [[I:%.*]] = icmp eq i32 [[C]], 42 ; TUNIT-NEXT: br i1 [[I]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @AS3OneKernelAtATime, align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[L]], i32 noundef [[L]], i32 noundef [[L]]) #[[ATTR7]] +; TUNIT-NEXT: call void @use(i32 noundef 42, i32 noundef 42, i32 noundef 42) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: f: ; TUNIT-NEXT: ret void @@ -413,8 +412,7 @@ define dso_local void @kernel3(i32 %C) norecurse "kernel" { ; CGSCC-NEXT: [[I:%.*]] = icmp eq i32 [[C]], 42 ; CGSCC-NEXT: br i1 [[I]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @AS3OneKernelAtATime, align 4 -; CGSCC-NEXT: call void @use(i32 noundef [[L]], i32 noundef [[L]], i32 noundef [[L]]) #[[ATTR4]] +; CGSCC-NEXT: call void @use(i32 noundef 42, i32 noundef 42, i32 noundef 42) #[[ATTR4]] ; CGSCC-NEXT: ret void ; CGSCC: f: ; CGSCC-NEXT: ret void |
