diff options
author | Andy Kaylor <akaylor@nvidia.com> | 2025-05-21 11:32:50 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-21 11:32:50 -0700 |
commit | 3ce74c3b4026473a0e3855598bf6dc98aac081a9 (patch) | |
tree | 7c9f394c8486178b8fbd40306f80fe14903f674e /clang/lib/CIR/CodeGen/CIRGenModule.cpp | |
parent | e4e7a7e64e75e0f39e94e9bac77d6def34ed142b (diff) | |
download | llvm-3ce74c3b4026473a0e3855598bf6dc98aac081a9.zip llvm-3ce74c3b4026473a0e3855598bf6dc98aac081a9.tar.gz llvm-3ce74c3b4026473a0e3855598bf6dc98aac081a9.tar.bz2 |
[CIR] Upstream support for string literals (#140796)
This adds the minimal support needed to handle string literals.
Diffstat (limited to 'clang/lib/CIR/CodeGen/CIRGenModule.cpp')
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenModule.cpp | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index e170498..fba8c07 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -562,6 +562,30 @@ void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl gd, llvm_unreachable("Invalid argument to CIRGenModule::emitGlobalDefinition"); } +mlir::Attribute +CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) { + assert(!e->getType()->isPointerType() && "Strings are always arrays"); + + // Don't emit it as the address of the string, emit the string data itself + // as an inline array. + if (e->getCharByteWidth() == 1) { + SmallString<64> str(e->getString()); + + // Resize the string to the right size, which is indicated by its type. + const ConstantArrayType *cat = + astContext.getAsConstantArrayType(e->getType()); + uint64_t finalSize = cat->getZExtSize(); + str.resize(finalSize); + + mlir::Type eltTy = convertType(cat->getElementType()); + return builder.getString(str, eltTy, finalSize); + } + + errorNYI(e->getSourceRange(), + "getConstantArrayFromStringLiteral: wide characters"); + return mlir::Attribute(); +} + static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) { assert(!cir::MissingFeatures::supportComdat()); @@ -749,6 +773,84 @@ CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) { return getCIRLinkageForDeclarator(vd, linkage, isConstant); } +static cir::GlobalOp generateStringLiteral(mlir::Location loc, + mlir::TypedAttr c, CIRGenModule &cgm, + StringRef globalName) { + assert(!cir::MissingFeatures::addressSpace()); + + // Create a global variable for this string + // FIXME(cir): check for insertion point in module level. + cir::GlobalOp gv = + CIRGenModule::createGlobalOp(cgm, loc, globalName, c.getType()); + + // Set up extra information and add to the module + assert(!cir::MissingFeatures::opGlobalAlignment()); + assert(!cir::MissingFeatures::opGlobalLinkage()); + assert(!cir::MissingFeatures::opGlobalThreadLocal()); + assert(!cir::MissingFeatures::opGlobalUnnamedAddr()); + CIRGenModule::setInitializer(gv, c); + assert(!cir::MissingFeatures::supportComdat()); + assert(!cir::MissingFeatures::opGlobalDSOLocal()); + return gv; +} + +// LLVM IR automatically uniques names when new llvm::GlobalVariables are +// created. This is handy, for example, when creating globals for string +// literals. Since we don't do that when creating cir::GlobalOp's, we need +// a mechanism to generate a unique name in advance. +// +// For now, this mechanism is only used in cases where we know that the +// name is compiler-generated, so we don't use the MLIR symbol table for +// the lookup. +std::string CIRGenModule::getUniqueGlobalName(const std::string &baseName) { + // If this is the first time we've generated a name for this basename, use + // it as is and start a counter for this base name. + auto it = cgGlobalNames.find(baseName); + if (it == cgGlobalNames.end()) { + cgGlobalNames[baseName] = 1; + return baseName; + } + + std::string result = + baseName + "." + std::to_string(cgGlobalNames[baseName]++); + // There should not be any symbol with this name in the module. + assert(!mlir::SymbolTable::lookupSymbolIn(theModule, result)); + return result; +} + +/// Return a pointer to a constant array for the given string literal. +cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s, + StringRef name) { + mlir::Attribute c = getConstantArrayFromStringLiteral(s); + + if (getLangOpts().WritableStrings) { + errorNYI(s->getSourceRange(), + "getGlobalForStringLiteral: Writable strings"); + } + + // Mangle the string literal if that's how the ABI merges duplicate strings. + // Don't do it if they are writable, since we don't want writes in one TU to + // affect strings in another. + if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) && + !getLangOpts().WritableStrings) { + errorNYI(s->getSourceRange(), + "getGlobalForStringLiteral: mangle string literals"); + } + + // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so + // we need to do that explicitly. + std::string uniqueName = getUniqueGlobalName(name.str()); + mlir::Location loc = getLoc(s->getSourceRange()); + auto typedC = llvm::cast<mlir::TypedAttr>(c); + assert(!cir::MissingFeatures::opGlobalAlignment()); + cir::GlobalOp gv = generateStringLiteral(loc, typedC, *this, uniqueName); + assert(!cir::MissingFeatures::opGlobalDSOLocal()); + + assert(!cir::MissingFeatures::sanitizers()); + + return gv; +} + void CIRGenModule::emitDeclContext(const DeclContext *dc) { for (Decl *decl : dc->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope |