aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CIR/CodeGen/CIRGenModule.cpp
diff options
context:
space:
mode:
authorAndy Kaylor <akaylor@nvidia.com>2025-05-21 11:32:50 -0700
committerGitHub <noreply@github.com>2025-05-21 11:32:50 -0700
commit3ce74c3b4026473a0e3855598bf6dc98aac081a9 (patch)
tree7c9f394c8486178b8fbd40306f80fe14903f674e /clang/lib/CIR/CodeGen/CIRGenModule.cpp
parente4e7a7e64e75e0f39e94e9bac77d6def34ed142b (diff)
downloadllvm-3ce74c3b4026473a0e3855598bf6dc98aac081a9.zip
llvm-3ce74c3b4026473a0e3855598bf6dc98aac081a9.tar.gz
llvm-3ce74c3b4026473a0e3855598bf6dc98aac081a9.tar.bz2
[CIR] Upstream support for string literals (#140796)
This adds the minimal support needed to handle string literals.
Diffstat (limited to 'clang/lib/CIR/CodeGen/CIRGenModule.cpp')
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenModule.cpp102
1 files changed, 102 insertions, 0 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index e170498..fba8c07 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -562,6 +562,30 @@ void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl gd,
llvm_unreachable("Invalid argument to CIRGenModule::emitGlobalDefinition");
}
+mlir::Attribute
+CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) {
+ assert(!e->getType()->isPointerType() && "Strings are always arrays");
+
+ // Don't emit it as the address of the string, emit the string data itself
+ // as an inline array.
+ if (e->getCharByteWidth() == 1) {
+ SmallString<64> str(e->getString());
+
+ // Resize the string to the right size, which is indicated by its type.
+ const ConstantArrayType *cat =
+ astContext.getAsConstantArrayType(e->getType());
+ uint64_t finalSize = cat->getZExtSize();
+ str.resize(finalSize);
+
+ mlir::Type eltTy = convertType(cat->getElementType());
+ return builder.getString(str, eltTy, finalSize);
+ }
+
+ errorNYI(e->getSourceRange(),
+ "getConstantArrayFromStringLiteral: wide characters");
+ return mlir::Attribute();
+}
+
static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) {
assert(!cir::MissingFeatures::supportComdat());
@@ -749,6 +773,84 @@ CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) {
return getCIRLinkageForDeclarator(vd, linkage, isConstant);
}
+static cir::GlobalOp generateStringLiteral(mlir::Location loc,
+ mlir::TypedAttr c, CIRGenModule &cgm,
+ StringRef globalName) {
+ assert(!cir::MissingFeatures::addressSpace());
+
+ // Create a global variable for this string
+ // FIXME(cir): check for insertion point in module level.
+ cir::GlobalOp gv =
+ CIRGenModule::createGlobalOp(cgm, loc, globalName, c.getType());
+
+ // Set up extra information and add to the module
+ assert(!cir::MissingFeatures::opGlobalAlignment());
+ assert(!cir::MissingFeatures::opGlobalLinkage());
+ assert(!cir::MissingFeatures::opGlobalThreadLocal());
+ assert(!cir::MissingFeatures::opGlobalUnnamedAddr());
+ CIRGenModule::setInitializer(gv, c);
+ assert(!cir::MissingFeatures::supportComdat());
+ assert(!cir::MissingFeatures::opGlobalDSOLocal());
+ return gv;
+}
+
+// LLVM IR automatically uniques names when new llvm::GlobalVariables are
+// created. This is handy, for example, when creating globals for string
+// literals. Since we don't do that when creating cir::GlobalOp's, we need
+// a mechanism to generate a unique name in advance.
+//
+// For now, this mechanism is only used in cases where we know that the
+// name is compiler-generated, so we don't use the MLIR symbol table for
+// the lookup.
+std::string CIRGenModule::getUniqueGlobalName(const std::string &baseName) {
+ // If this is the first time we've generated a name for this basename, use
+ // it as is and start a counter for this base name.
+ auto it = cgGlobalNames.find(baseName);
+ if (it == cgGlobalNames.end()) {
+ cgGlobalNames[baseName] = 1;
+ return baseName;
+ }
+
+ std::string result =
+ baseName + "." + std::to_string(cgGlobalNames[baseName]++);
+ // There should not be any symbol with this name in the module.
+ assert(!mlir::SymbolTable::lookupSymbolIn(theModule, result));
+ return result;
+}
+
+/// Return a pointer to a constant array for the given string literal.
+cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s,
+ StringRef name) {
+ mlir::Attribute c = getConstantArrayFromStringLiteral(s);
+
+ if (getLangOpts().WritableStrings) {
+ errorNYI(s->getSourceRange(),
+ "getGlobalForStringLiteral: Writable strings");
+ }
+
+ // Mangle the string literal if that's how the ABI merges duplicate strings.
+ // Don't do it if they are writable, since we don't want writes in one TU to
+ // affect strings in another.
+ if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) &&
+ !getLangOpts().WritableStrings) {
+ errorNYI(s->getSourceRange(),
+ "getGlobalForStringLiteral: mangle string literals");
+ }
+
+ // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so
+ // we need to do that explicitly.
+ std::string uniqueName = getUniqueGlobalName(name.str());
+ mlir::Location loc = getLoc(s->getSourceRange());
+ auto typedC = llvm::cast<mlir::TypedAttr>(c);
+ assert(!cir::MissingFeatures::opGlobalAlignment());
+ cir::GlobalOp gv = generateStringLiteral(loc, typedC, *this, uniqueName);
+ assert(!cir::MissingFeatures::opGlobalDSOLocal());
+
+ assert(!cir::MissingFeatures::sanitizers());
+
+ return gv;
+}
+
void CIRGenModule::emitDeclContext(const DeclContext *dc) {
for (Decl *decl : dc->decls()) {
// Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope