aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorJoseph Huber <jhuber6@vols.utk.edu>2023-11-10 14:53:53 -0600
committerGitHub <noreply@github.com>2023-11-10 14:53:53 -0600
commit237adfca4ef8d94b7d3d8817da615c5e5897c3da (patch)
treefc071a9dc1137dbb2caadef6157bdf3a221024b3 /clang/lib/CodeGen
parent133bcacecfb70e8b1692f9c2c0a44ec640a0422a (diff)
downloadllvm-237adfca4ef8d94b7d3d8817da615c5e5897c3da.zip
llvm-237adfca4ef8d94b7d3d8817da615c5e5897c3da.tar.gz
llvm-237adfca4ef8d94b7d3d8817da615c5e5897c3da.tar.bz2
[OpenMP] Rework handling of global ctor/dtors in OpenMP (#71739)
Summary: This patch reworks how we handle global constructors in OpenMP. Previously, we emitted individual kernels that were all registered and called individually. In order to provide more generic support, this patch moves all handling of this to the target backend and the runtime plugin. This has the benefit of supporting the GNU extensions for constructors an destructors, removing a class of failures related to shared library destruction order, and allows targets other than OpenMP to use the same support without needing to change the frontend. This is primarily done by calling kernels that the backend emits to iterate a list of ctor / dtor functions. For x64, this is automatic and we get it for free with the standard `dlopen` handling. For AMDGPU, we emit `amdgcn.device.init` and `amdgcn.device.fini` functions which handle everything atuomatically and simply need to be called. For NVPTX, a patch https://github.com/llvm/llvm-project/pull/71549 provides the kernels to call, but the runtime needs to set up the array manually by pulling out all the known constructor / destructor functions. One concession that this patch requires is the change that for GPU targets in OpenMP offloading we will use `llvm.global_dtors` instead of using `atexit`. This is because `atexit` is a separate runtime function that does not mesh well with the handling we're trying to do here. This should be equivalent in all cases except for cases where we would need to destruct manually such as: ``` struct S { ~S() { foo(); } }; void foo() { static S s; } ``` However this is broken in many other ways on the GPU, so it is not regressing any support, simply increasing the scope of what we can handle. This changes the handling of ctors / dtors. This patch now outputs a information message regarding the deprecation if the old format is used. This will be completely removed in a later release. Depends on: https://github.com/llvm/llvm-project/pull/71549
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGDeclCXX.cpp13
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp130
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h8
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h5
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h14
-rw-r--r--clang/lib/CodeGen/ItaniumCXXABI.cpp8
6 files changed, 29 insertions, 149 deletions
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 3fa28b3..e08a1e5 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -327,6 +327,15 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
registerGlobalDtorWithAtExit(dtorStub);
}
+/// Register a global destructor using the LLVM 'llvm.global_dtors' global.
+void CodeGenFunction::registerGlobalDtorWithLLVM(const VarDecl &VD,
+ llvm::FunctionCallee Dtor,
+ llvm::Constant *Addr) {
+ // Create a function which calls the destructor.
+ llvm::Function *dtorStub = createAtExitStub(VD, Dtor, Addr);
+ CGM.AddGlobalDtor(dtorStub);
+}
+
void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
// extern "C" int atexit(void (*f)(void));
assert(dtorStub->getType() ==
@@ -519,10 +528,6 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
D->hasAttr<CUDASharedAttr>()))
return;
- if (getLangOpts().OpenMP &&
- getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))
- return;
-
// Check if we've already initialized this decl.
auto I = DelayedCXXInitPosition.find(D);
if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a8e1150..d2be814 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1747,136 +1747,6 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
-bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
- llvm::GlobalVariable *Addr,
- bool PerformInit) {
- if (CGM.getLangOpts().OMPTargetTriples.empty() &&
- !CGM.getLangOpts().OpenMPIsTargetDevice)
- return false;
- std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
- ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
- HasRequiresUnifiedSharedMemory))
- return CGM.getLangOpts().OpenMPIsTargetDevice;
- VD = VD->getDefinition(CGM.getContext());
- assert(VD && "Unknown VarDecl");
-
- if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
- return CGM.getLangOpts().OpenMPIsTargetDevice;
-
- QualType ASTTy = VD->getType();
- SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
-
- // Produce the unique prefix to identify the new target regions. We use
- // the source location of the variable declaration which we know to not
- // conflict with any target region.
- llvm::TargetRegionEntryInfo EntryInfo =
- getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
- SmallString<128> Buffer, Out;
- OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
-
- const Expr *Init = VD->getAnyInitializer();
- if (CGM.getLangOpts().CPlusPlus && PerformInit) {
- llvm::Constant *Ctor;
- llvm::Constant *ID;
- if (CGM.getLangOpts().OpenMPIsTargetDevice) {
- // Generate function that re-emits the declaration's initializer into
- // the threadprivate copy of the variable VD
- CodeGenFunction CtorCGF(CGM);
-
- const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
- FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
- llvm::GlobalValue::WeakODRLinkage);
- Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
- if (CGM.getTriple().isAMDGCN())
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
- auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
- CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
- FunctionArgList(), Loc, Loc);
- auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
- llvm::Constant *AddrInAS0 = Addr;
- if (Addr->getAddressSpace() != 0)
- AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
- Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
- CtorCGF.EmitAnyExprToMem(Init,
- Address(AddrInAS0, Addr->getValueType(),
- CGM.getContext().getDeclAlign(VD)),
- Init->getType().getQualifiers(),
- /*IsInitializer=*/true);
- CtorCGF.FinishFunction();
- Ctor = Fn;
- ID = Fn;
- } else {
- Ctor = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage,
- llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
- ID = Ctor;
- }
-
- // Register the information for the entry associated with the constructor.
- Out.clear();
- auto CtorEntryInfo = EntryInfo;
- CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
- OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
- CtorEntryInfo, Ctor, ID,
- llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
- }
- if (VD->getType().isDestructedType() != QualType::DK_none) {
- llvm::Constant *Dtor;
- llvm::Constant *ID;
- if (CGM.getLangOpts().OpenMPIsTargetDevice) {
- // Generate function that emits destructor call for the threadprivate
- // copy of the variable VD
- CodeGenFunction DtorCGF(CGM);
-
- const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
- FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
- llvm::GlobalValue::WeakODRLinkage);
- Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
- if (CGM.getTriple().isAMDGCN())
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
- auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
- DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
- FunctionArgList(), Loc, Loc);
- // Create a scope with an artificial location for the body of this
- // function.
- auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
- llvm::Constant *AddrInAS0 = Addr;
- if (Addr->getAddressSpace() != 0)
- AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
- Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
- DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
- CGM.getContext().getDeclAlign(VD)),
- ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
- DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
- DtorCGF.FinishFunction();
- Dtor = Fn;
- ID = Fn;
- } else {
- Dtor = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage,
- llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
- ID = Dtor;
- }
- // Register the information for the entry associated with the destructor.
- Out.clear();
- auto DtorEntryInfo = EntryInfo;
- DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
- OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
- DtorEntryInfo, Dtor, ID,
- llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
- }
- return CGM.getLangOpts().OpenMPIsTargetDevice;
-}
-
void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
llvm::GlobalValue *GV) {
std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 0c4ad46..b01b39a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -1089,14 +1089,6 @@ public:
SourceLocation Loc, bool PerformInit,
CodeGenFunction *CGF = nullptr);
- /// Emit a code for initialization of declare target variable.
- /// \param VD Declare target variable.
- /// \param Addr Address of the global variable \a VD.
- /// \param PerformInit true if initialization expression is not constant.
- virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD,
- llvm::GlobalVariable *Addr,
- bool PerformInit);
-
/// Emit code for handling declare target functions in the runtime.
/// \param FD Declare target function.
/// \param Addr Address of the global \a FD.
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index bb8c144..b4c634d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4536,6 +4536,11 @@ public:
void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn,
llvm::Constant *addr);
+ /// Registers the dtor using 'llvm.global_dtors' for platforms that do not
+ /// support an 'atexit()' function.
+ void registerGlobalDtorWithLLVM(const VarDecl &D, llvm::FunctionCallee fn,
+ llvm::Constant *addr);
+
/// Call atexit() with function dtorStub.
void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 793861f..e81edc9 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1570,6 +1570,13 @@ public:
const VarDecl *D,
ForDefinition_t IsForDefinition = NotForDefinition);
+ // FIXME: Hardcoding priority here is gross.
+ void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
+ unsigned LexOrder = ~0U,
+ llvm::Constant *AssociatedData = nullptr);
+ void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
+ bool IsDtorAttrFunc = false);
+
private:
llvm::Constant *GetOrCreateLLVMFunction(
StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
@@ -1641,13 +1648,6 @@ private:
void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr,
llvm::Function *InitFunc, InitSegAttr *ISA);
- // FIXME: Hardcoding priority here is gross.
- void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
- unsigned LexOrder = ~0U,
- llvm::Constant *AssociatedData = nullptr);
- void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
- bool IsDtorAttrFunc = false);
-
/// EmitCtorList - Generates a global array of functions and priorities using
/// the given list and name. This array will have appending linkage and is
/// suitable for use as a LLVM constructor or destructor array. Clears Fns.
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 89a2127..7398f49 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
if (D.isNoDestroy(CGM.getContext()))
return;
+ // OpenMP offloading supports C++ constructors and destructors but we do not
+ // always have 'atexit' available. Instead lower these to use the LLVM global
+ // destructors which we can handle directly in the runtime. Note that this is
+ // not strictly 1-to-1 with using `atexit` because we no longer tear down
+ // globals in reverse order of when they were constructed.
+ if (!CGM.getLangOpts().hasAtExit() && !D.isStaticLocal())
+ return CGF.registerGlobalDtorWithLLVM(D, dtor, addr);
+
// emitGlobalDtorWithCXAAtExit will emit a call to either __cxa_thread_atexit
// or __cxa_atexit depending on whether this VarDecl is a thread-local storage
// or not. CXAAtExit controls only __cxa_atexit, so use it if it is enabled.