aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Doerfert <johannes@jdoerfert.de>2023-10-21 12:05:59 -0700
committerJohannes Doerfert <johannes@jdoerfert.de>2023-10-21 12:31:06 -0700
commitba87fba80632b2149de0276d3fd41601f8dcf5e0 (patch)
treec3822bea15eedadb61d8192ce8a78763f1d921f1
parentbb96093ca620a43bf56c0064196556c7aad6685f (diff)
downloadllvm-ba87fba80632b2149de0276d3fd41601f8dcf5e0.zip
llvm-ba87fba80632b2149de0276d3fd41601f8dcf5e0.tar.gz
llvm-ba87fba80632b2149de0276d3fd41601f8dcf5e0.tar.bz2
[Attributor] Ignore different kernels for kernel lifetime objects
If a potential interfering access is in a different kernel and the underlying object has kernel lifetime we can straight out ignore the interfering access. TODO: This should be made much stronger via "reaching kernels", which we already track in AAKernelInfo.
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp21
-rw-r--r--llvm/test/Transforms/Attributor/value-simplify-gpu.ll6
2 files changed, 20 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3808eef..e062acd 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1200,6 +1200,11 @@ struct AAPointerInfoImpl
A, this, IRPosition::function(Scope), DepClassTy::OPTIONAL,
IsKnownNoRecurse);
+ // TODO: Use reaching kernels from AAKernelInfo (or move it to
+ // AAExecutionDomain) such that we allow scopes other than kernels as long
+ // as the reaching kernels are disjoint.
+ bool InstInKernel = Scope.hasFnAttribute("kernel");
+ bool ObjHasKernelLifetime = false;
const bool UseDominanceReasoning =
FindInterferingWrites && IsKnownNoRecurse;
const DominatorTree *DT =
@@ -1232,6 +1237,7 @@ struct AAPointerInfoImpl
// If the alloca containing function is not recursive the alloca
// must be dead in the callee.
const Function *AIFn = AI->getFunction();
+ ObjHasKernelLifetime = AIFn->hasFnAttribute("kernel");
bool IsKnownNoRecurse;
if (AA::hasAssumedIRAttr<Attribute::NoRecurse>(
A, this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL,
@@ -1241,7 +1247,8 @@ struct AAPointerInfoImpl
} else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) {
// If the global has kernel lifetime we can stop if we reach a kernel
// as it is "dead" in the (unknown) callees.
- if (HasKernelLifetime(GV, *GV->getParent()))
+ ObjHasKernelLifetime = HasKernelLifetime(GV, *GV->getParent());
+ if (ObjHasKernelLifetime)
IsLiveInCalleeCB = [](const Function &Fn) {
return !Fn.hasFnAttribute("kernel");
};
@@ -1252,6 +1259,15 @@ struct AAPointerInfoImpl
AA::InstExclusionSetTy ExclusionSet;
auto AccessCB = [&](const Access &Acc, bool Exact) {
+ Function *AccScope = Acc.getRemoteInst()->getFunction();
+ bool AccInSameScope = AccScope == &Scope;
+
+ // If the object has kernel lifetime we can ignore accesses only reachable
+ // by other kernels. For now we only skip accesses *in* other kernels.
+ if (InstInKernel && ObjHasKernelLifetime && !AccInSameScope &&
+ AccScope->hasFnAttribute("kernel"))
+ return true;
+
if (Exact && Acc.isMustAccess() && Acc.getRemoteInst() != &I) {
if (Acc.isWrite() || (isa<LoadInst>(I) && Acc.isWriteOrAssumption()))
ExclusionSet.insert(Acc.getRemoteInst());
@@ -1262,8 +1278,7 @@ struct AAPointerInfoImpl
return true;
bool Dominates = FindInterferingWrites && DT && Exact &&
- Acc.isMustAccess() &&
- (Acc.getRemoteInst()->getFunction() == &Scope) &&
+ Acc.isMustAccess() && AccInSameScope &&
DT->dominates(Acc.getRemoteInst(), &I);
if (Dominates)
DominatingWrites.insert(&Acc);
diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
index b57cf75..a7e0a92 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
@@ -401,8 +401,7 @@ define dso_local void @kernel3(i32 %C) norecurse "kernel" {
; TUNIT-NEXT: [[I:%.*]] = icmp eq i32 [[C]], 42
; TUNIT-NEXT: br i1 [[I]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
-; TUNIT-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @AS3OneKernelAtATime, align 4
-; TUNIT-NEXT: call void @use(i32 noundef [[L]], i32 noundef [[L]], i32 noundef [[L]]) #[[ATTR7]]
+; TUNIT-NEXT: call void @use(i32 noundef 42, i32 noundef 42, i32 noundef 42) #[[ATTR7]]
; TUNIT-NEXT: ret void
; TUNIT: f:
; TUNIT-NEXT: ret void
@@ -413,8 +412,7 @@ define dso_local void @kernel3(i32 %C) norecurse "kernel" {
; CGSCC-NEXT: [[I:%.*]] = icmp eq i32 [[C]], 42
; CGSCC-NEXT: br i1 [[I]], label [[T:%.*]], label [[F:%.*]]
; CGSCC: t:
-; CGSCC-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @AS3OneKernelAtATime, align 4
-; CGSCC-NEXT: call void @use(i32 noundef [[L]], i32 noundef [[L]], i32 noundef [[L]]) #[[ATTR4]]
+; CGSCC-NEXT: call void @use(i32 noundef 42, i32 noundef 42, i32 noundef 42) #[[ATTR4]]
; CGSCC-NEXT: ret void
; CGSCC: f:
; CGSCC-NEXT: ret void