diff options
| author | Tony Varghese <tony.varghese@ibm.com> | 2026-03-18 10:48:36 -0400 |
|---|---|---|
| committer | Tony Varghese <tony.varghese@ibm.com> | 2026-03-23 02:01:58 -0400 |
| commit | 9fa76d028cc835c1aa1ab55544a6e42d8523e8b3 (patch) | |
| tree | 84760183e8aa597efa1be1937a6ae734bb0b4dc2 | |
| parent | ed0f1ec37fa03d18518b59509bb77ea9693ae163 (diff) | |
| download | llvm-users/tonykuttai/pragma-comment-copyright-cli.tar.gz llvm-users/tonykuttai/pragma-comment-copyright-cli.tar.bz2 llvm-users/tonykuttai/pragma-comment-copyright-cli.zip | |
[Clang][AIX] Add -mloadtime-comment-vars flag to preserve identifying variablesusers/tonykuttai/pragma-comment-copyright-cli
| -rw-r--r-- | clang/include/clang/Basic/CodeGenOptions.h | 2 | ||||
| -rw-r--r-- | clang/include/clang/Options/Options.td | 7 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 73 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CodeGenModule.h | 6 | ||||
| -rw-r--r-- | clang/lib/Driver/ToolChains/Clang.cpp | 5 | ||||
| -rw-r--r-- | clang/test/CodeGen/loadtime-comment-vars.c | 28 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp | 120 | ||||
| -rw-r--r-- | llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll | 26 |
8 files changed, 212 insertions, 55 deletions
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 9454f7672b7e..062a7a4dff73 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -323,6 +323,8 @@ public: /// A list of linker options to embed in the object file. std::vector<std::string> LinkerOptions; + std::vector<std::string> LoadTimeCommentVars; + /// Name of the profile file to use as output for -fprofile-instr-generate, /// -fprofile-generate, and -fcs-profile-generate. std::string InstrProfileOutput; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 8b0c70152172..92d86bc3d06f 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4698,6 +4698,13 @@ def fvisibility_global_new_delete_EQ : Joined<["-"], "fvisibility-global-new-del Visibility<[ClangOption, CC1Option]>, HelpText<"The visibility for global C++ operator new and delete declarations. If 'source' is specified the visibility is not adjusted">, MarshallingInfoVisibilityGlobalNewDelete<LangOpts<"GlobalAllocationFunctionVisibility">, "ForceDefault">; +def mloadtime_comment_vars_EQ + : CommaJoined<["-"], "mloadtime-comment-vars=">, + Group<m_Group>, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Comma-separated list of global variable names to treat as " + "loadtime variables">, + MarshallingInfoStringVector<CodeGenOpts<"LoadTimeCommentVars">>; def mdefault_visibility_export_mapping_EQ : Joined<["-"], "mdefault-visibility-export-mapping=">, Values<"none,explicit,all">, NormalizedValuesScope<"LangOptions::DefaultVisiblityExportMapping">, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index eaa64b10e236..a2432a80e71a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -69,6 +69,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Hash.h" #include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/Triple.h" @@ -1633,6 +1634,9 @@ void CodeGenModule::Release() { EmitBackendOptionsMetadata(getCodeGenOpts()); EmitLoadTimeComment(); + + // Handle CLI load-time string variables + EmitLoadTimeCommentVars(); // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, *getFileSystem(), getDiags()); @@ -4106,6 +4110,75 @@ void CodeGenModule::EmitLoadTimeComment() { } } +bool CodeGenModule::isValidLoadTimeCommentVariable(const VarDecl *D) const { + // Must be a valid declaration and must have an initializer (the string) + if (!D || !D->hasInit()) + return false; + + QualType Ty = D->getType(); + + // 1. Handle Pointers (e.g., char *sccsid, const char *copyright) + if (const PointerType *PT = Ty->getAs<PointerType>()) { + if (PT->getPointeeType()->isAnyCharacterType()) + return true; + } + + // 2. Handle Arrays (e.g., char version[]) + // We use ASTContext::getAsArrayType to safely unwrap constant arrays + if (const ArrayType *AT = getContext().getAsArrayType(Ty)) { + if (AT->getElementType()->isAnyCharacterType()) + return true; + } + + return false; // Reject ints, structs, etc. +} + +void CodeGenModule::EmitLoadTimeCommentVars() { + // Handle CLI loadtime comment variables + if (!getTriple().isOSAIX()) + return; + + const auto &LoadTimeCommentVars = getCodeGenOpts().LoadTimeCommentVars; + if (LoadTimeCommentVars.empty()) + return; + + TranslationUnitDecl *TU = getContext().getTranslationUnitDecl(); + // Iterate through ALL top-level declarations + for (auto *D : TU->decls()) { + if (VarDecl *VD = dyn_cast<VarDecl>(D)) { + + // Check if the variable name is in our parsed list + if (!llvm::is_contained(LoadTimeCommentVars, VD->getName())) + continue; + + if (!isValidLoadTimeCommentVariable(VD)) + continue; + + // Get or create the GlobalValue in the IR + llvm::Constant *Addr = GetAddrOfGlobalVar(VD); + + // Strip pointer casts safely + if (auto *GV = + dyn_cast<llvm::GlobalVariable>(Addr->stripPointerCasts())) { + + // Force Clang to emit the definition if it skipped it + if (GV->isDeclaration()) + EmitGlobalDefinition(VD); + + if (!GV->isDeclaration()) { + // Tag it for the backend and prevent GC + auto &C = getLLVMContext(); + llvm::Metadata *Ops[] = {llvm::MDString::get(C, VD->getName())}; + GV->setMetadata("copyright.variable", llvm::MDNode::get(C, Ops)); + + // Prevent Linker/Optimization GC + addUsedGlobal(GV); + } + } + } + } +} + bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // In OpenMP 5.0 variables and function may be marked as // device_type(host/nohost) and we should not emit them eagerly unless we sure diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index d859943ebfb7..60310da9529d 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -2086,6 +2086,12 @@ private: /// be processed by the backend to include it in the generated executable. void EmitLoadTimeComment(); + /// Helper method to check if a variable Decl is part of + /// LoadTimeCommentVars + bool isValidLoadTimeCommentVariable(const VarDecl *D) const; + + void EmitLoadTimeCommentVars(); + /// Determine whether the definition can be emitted eagerly, or should be /// delayed until the end of the translation unit. This is relevant for /// definitions whose linkage can change, e.g. implicit function instantions diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6416baf9126f..0d6e495989ad 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6080,6 +6080,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, << A->getOption().getName() << TripleStr; } + // Forward loadtime-comment vars option to cc1 + if (Arg *A = Args.getLastArg(options::OPT_mloadtime_comment_vars_EQ)) { + A->render(Args, CmdArgs); + } + // Prepare `-aux-target-cpu` and `-aux-target-feature` unless // `--gpu-use-aux-triple-only` is specified. if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) && diff --git a/clang/test/CodeGen/loadtime-comment-vars.c b/clang/test/CodeGen/loadtime-comment-vars.c new file mode 100644 index 000000000000..ef32ba494ed8 --- /dev/null +++ b/clang/test/CodeGen/loadtime-comment-vars.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple powerpc-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-ibm-aix -mloadtime-comment-vars=sccsid,version,build_number -emit-llvm -o - %s | FileCheck %s + +// String pointer (Should be emitted and tagged) +static char *sccsid = "@(#) Object sid Version 1.0"; + +// String array (Should be emitted and tagged) +static char version[] = "Object scc Version 2.0"; + +// Const string (Not in CLI list, should NOT be emitted) +static const char *copyright = "Copyright 2026"; + +// Integer (In CLI list but invalid type, should NOT be emitted) +static int build_number = 12345; + +void foo() {} + +// CHECK: @sccsid = internal global ptr @.str, align {{[0-9]+}}, !copyright.variable ![[MD_SCC:[0-9]+]] +// CHECK-NEXT: @.str = private unnamed_addr constant [28 x i8] c"@(#) Object sid Version 1.0\00", align 1 +// CHECK: @version = internal global [23 x i8] c"Object scc Version 2.0\00", align {{[0-9]+}}, !copyright.variable ![[MD_VER:[0-9]+]] + +// Ensure the unrequested/invalid variables are optimized away +// CHECK-NOT: @copyright +// CHECK-NOT: @build_number + +// Ensure the metadata tags contain the correct strings +// CHECK: ![[MD_SCC]] = !{!"sccsid"} +// CHECK: ![[MD_VER]] = !{!"version"} diff --git a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp index 6deef2f75e0a..3a20694a6728 100644 --- a/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp +++ b/llvm/lib/Transforms/Utils/LowerCommentStringPass.cpp @@ -81,68 +81,78 @@ PreservedAnalyses LowerCommentStringPass::run(Module &M, LLVMContext &Ctx = M.getContext(); + // Collect all globals that need implicit refs, both string and variables + SmallVector<GlobalValue *, 4> CopyrightGlobals; + + // 1. Process pragma comment copyright (string literal) Once per TU // Single-metadata: !comment_string.loadtime = !{!0} // Each operand node is expected to have one MDString operand. NamedMDNode *MD = M.getNamedMetadata("comment_string.loadtime"); - if (!MD || MD->getNumOperands() == 0) - return PreservedAnalyses::all(); - - // At this point we are guarateed that one TU contains a single copyright - // metadata entry. Create TU-local string global for that metadata entry. - MDNode *MdNode = MD->getOperand(0); - if (!MdNode || MdNode->getNumOperands() == 0) - return PreservedAnalyses::all(); - - auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0)); - if (!MdString) - return PreservedAnalyses::all(); - - StringRef Text = MdString->getString(); - if (Text.empty()) - return PreservedAnalyses::all(); - - // 1. Create a single NULL-terminated string global - Constant *StrInit = ConstantDataArray::getString(Ctx, Text, /*AddNull=*/true); - - // Internal, constant, TU-local--avoids duplicate symbol issues across TUs. - auto *StrGV = new GlobalVariable(M, StrInit->getType(), - /*isConstant=*/true, - GlobalValue::InternalLinkage, StrInit, - /*Name=*/"__loadtime_comment_str"); - // Set unnamed_addr to allow the linker to merge identical strings - StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - StrGV->setAlignment(Align(1)); - // Place in the "__loadtime_comment" section. - // The GV is constant, so we expect a read-only section. - StrGV->setSection("__loadtime_comment"); - - // 2. Add the string to llvm.used to prevent LLVM optimization/LTO passes from - // removing it. - appendToUsed(M, {StrGV}); - - // 3. Attach !implicit ref to every defined function - // Create a metadata node pointing to the copyright string: - // !N = !{ptr @__loadtime_comment_str} - Metadata *Ops[] = {ConstantAsMetadata::get(StrGV)}; - MDNode *ImplicitRefMD = MDNode::get(Ctx, Ops); - - // Lambda to attach implicit.ref metadata to a function. - auto AddImplicitRef = [&](Function &F) { + if (MD && MD->getNumOperands() > 0) { + MDNode *MdNode = MD->getOperand(0); + if (MdNode && MdNode->getNumOperands() > 0) { + auto *MdString = dyn_cast_or_null<MDString>(MdNode->getOperand(0)); + if (MdString && !MdString->getString().empty()) { + StringRef Text = MdString->getString(); + + // Create the string global + Constant *StrInit = + ConstantDataArray::getString(Ctx, Text, /*AddNull*/ true); + auto *StrGV = new GlobalVariable(M, StrInit->getType(), + /*isConstant*/ true, + GlobalValue::InternalLinkage, StrInit, + "__loadtime_comment_str"); + StrGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + StrGV->setAlignment(Align(1)); + StrGV->setSection("__loadtime_comment"); + + // Add the string to llvm.used to prevent LLVM optimization/LTO passes + // from removing it + appendToUsed(M, {StrGV}); + + // Add to list of globals needing implicti refs + CopyrightGlobals.push_back(StrGV); + } + } + MD->eraseFromParent(); + } + + // 2. Process copyright variables - multiple allowed per TU + for (GlobalVariable &GV : M.globals()) { + if (GV.getMetadata("copyright.variable")) { + // Add to list of globals needing implcit refs + CopyrightGlobals.push_back(&GV); + } + } + + // Lambda to attach implicit ref metadata to a function + auto AddImplicitRef = [&](Function &F, GlobalValue *GV) { if (F.isDeclaration()) return; - // Attach the implicit.ref metadata to the function - F.setMetadata("implicit.ref", ImplicitRefMD); - LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: " - << F.getName() << "\n"); - }; - // Process all functions in the module - for (Function &F : M) - AddImplicitRef(F); + // Create a new MDNode with exactly ONE operand (the global variable) + Metadata *Ops[] = {ConstantAsMetadata::get(GV)}; + MDNode *NewMD = MDNode::get(Ctx, Ops); + + // addMetadata allows multiple nodes of the same kind to be attached to a + // function. This correctly creates a list of single-operand MDNodes. + F.addMetadata(LLVMContext::MD_implicit_ref, *NewMD); - // Cleanup the processed metadata. - MD->eraseFromParent(); - LLVM_DEBUG(dbgs() << "[copyright] created string and anchor for module\n"); + LLVM_DEBUG(dbgs() << "[copyright] attached implicit.ref to function: " + << F.getName() << " for global: " << GV->getName() + << "\n"); + }; + // 3. Attach implicit ref to all functions for each copyright gglobal + if (!CopyrightGlobals.empty()) { + // Apply to all functions for all copyright globals + for (GlobalValue *GV : CopyrightGlobals) { + for (Function &F : M) + AddImplicitRef(F, GV); + } + } + + LLVM_DEBUG(dbgs() << "[copyright] processed " << CopyrightGlobals.size() + << " copyright globals\n"); return PreservedAnalyses::all(); } diff --git a/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll new file mode 100644 index 000000000000..ff7c291d3aaf --- /dev/null +++ b/llvm/test/Transforms/LowerCommentString/loadtime-comment-vars.ll @@ -0,0 +1,26 @@ +; RUN: opt -passes=lower-comment-string -S < %s | FileCheck %s + +target triple = "powerpc64-ibm-aix" + +@sccsid = internal global ptr @.str, align 8, !copyright.variable !0 +@.str = private unnamed_addr constant [24 x i8] c"@(#) sccsid Version 1.0\00", align 1 +@version = internal global [22 x i8] c"Copyright Version 2.0\00", align 1, !copyright.variable !1 + +; CHECK: define void @foo() !implicit.ref ![[REF1:[0-9]+]] !implicit.ref ![[REF2:[0-9]+]] { +define void @foo() { +entry: + ret void +} + +; CHECK: define void @bar() !implicit.ref ![[REF1]] !implicit.ref ![[REF2]] { +define void @bar() { +entry: + ret void +} + +!0 = !{!"sccsid"} +!1 = !{!"version"} + +; Verify that the generated implicit.ref metadata nodes point to the correct global variables. +; CHECK: ![[REF1]] = !{ptr @sccsid} +; CHECK: ![[REF2]] = !{ptr @version}
\ No newline at end of file |
