[CIR] Upstream support for address of and dereference (#134317)

This adds support for handling the address of and dereference unary operations in ClangIR code generation. This also adds handling for nullptr and proper initialization via the NullToPointer cast.
author: Andy Kaylor <akaylor@nvidia.com> 2025-04-08 10:32:03 -0700
committer: GitHub <noreply@github.com> 2025-04-08 10:32:03 -0700
commit: 4928093a21cea9bd76d1e47455e990874ad352df (patch)
tree: 84552dc868642a1b67ff52927ce8b3964cd75d05 /clang/lib/CIR/CodeGen
parent: 9bfb4b8fb194c1df5b082888abc03d095e39b6e9 (diff)
download: llvm-4928093a21cea9bd76d1e47455e990874ad352df.zip
llvm-4928093a21cea9bd76d1e47455e990874ad352df.tar.gz
llvm-4928093a21cea9bd76d1e47455e990874ad352df.tar.bz2
8 files changed, 274 insertions, 5 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index f01e03a..fe3f2f5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -25,6 +25,147 @@ using namespace clang;
 using namespace clang::CIRGen;
 using namespace cir;
 
+/// Given an expression of pointer type, try to
+/// derive a more accurate bound on the alignment of the pointer.
+Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr) {
+  // We allow this with ObjC object pointers because of fragile ABIs.
+  assert(expr->getType()->isPointerType() ||
+         expr->getType()->isObjCObjectPointerType());
+  expr = expr->IgnoreParens();
+
+  // Casts:
+  if (auto const *ce = dyn_cast<CastExpr>(expr)) {
+    if (auto const *ece = dyn_cast<ExplicitCastExpr>(ce)) {
+      cgm.errorNYI(expr->getSourceRange(),
+                   "emitPointerWithAlignment: explicit cast");
+      return Address::invalid();
+    }
+
+    switch (ce->getCastKind()) {
+    // Non-converting casts (but not C's implicit conversion from void*).
+    case CK_BitCast:
+    case CK_NoOp:
+    case CK_AddressSpaceConversion: {
+      cgm.errorNYI(expr->getSourceRange(),
+                   "emitPointerWithAlignment: noop cast");
+      return Address::invalid();
+    } break;
+
+    // Array-to-pointer decay. TODO(cir): BaseInfo and TBAAInfo.
+    case CK_ArrayToPointerDecay: {
+      cgm.errorNYI(expr->getSourceRange(),
+                   "emitPointerWithAlignment: array-to-pointer decay");
+      return Address::invalid();
+    }
+
+    case CK_UncheckedDerivedToBase:
+    case CK_DerivedToBase: {
+      cgm.errorNYI(expr->getSourceRange(),
+                   "emitPointerWithAlignment: derived-to-base cast");
+      return Address::invalid();
+    }
+
+    case CK_AnyPointerToBlockPointerCast:
+    case CK_BaseToDerived:
+    case CK_BaseToDerivedMemberPointer:
+    case CK_BlockPointerToObjCPointerCast:
+    case CK_BuiltinFnToFnPtr:
+    case CK_CPointerToObjCPointerCast:
+    case CK_DerivedToBaseMemberPointer:
+    case CK_Dynamic:
+    case CK_FunctionToPointerDecay:
+    case CK_IntegralToPointer:
+    case CK_LValueToRValue:
+    case CK_LValueToRValueBitCast:
+    case CK_NullToMemberPointer:
+    case CK_NullToPointer:
+    case CK_ReinterpretMemberPointer:
+      // Common pointer conversions, nothing to do here.
+      // TODO: Is there any reason to treat base-to-derived conversions
+      // specially?
+      break;
+
+    case CK_ARCConsumeObject:
+    case CK_ARCExtendBlockObject:
+    case CK_ARCProduceObject:
+    case CK_ARCReclaimReturnedObject:
+    case CK_AtomicToNonAtomic:
+    case CK_BooleanToSignedIntegral:
+    case CK_ConstructorConversion:
+    case CK_CopyAndAutoreleaseBlockObject:
+    case CK_Dependent:
+    case CK_FixedPointCast:
+    case CK_FixedPointToBoolean:
+    case CK_FixedPointToFloating:
+    case CK_FixedPointToIntegral:
+    case CK_FloatingCast:
+    case CK_FloatingComplexCast:
+    case CK_FloatingComplexToBoolean:
+    case CK_FloatingComplexToIntegralComplex:
+    case CK_FloatingComplexToReal:
+    case CK_FloatingRealToComplex:
+    case CK_FloatingToBoolean:
+    case CK_FloatingToFixedPoint:
+    case CK_FloatingToIntegral:
+    case CK_HLSLAggregateSplatCast:
+    case CK_HLSLArrayRValue:
+    case CK_HLSLElementwiseCast:
+    case CK_HLSLVectorTruncation:
+    case CK_IntToOCLSampler:
+    case CK_IntegralCast:
+    case CK_IntegralComplexCast:
+    case CK_IntegralComplexToBoolean:
+    case CK_IntegralComplexToFloatingComplex:
+    case CK_IntegralComplexToReal:
+    case CK_IntegralRealToComplex:
+    case CK_IntegralToBoolean:
+    case CK_IntegralToFixedPoint:
+    case CK_IntegralToFloating:
+    case CK_LValueBitCast:
+    case CK_MatrixCast:
+    case CK_MemberPointerToBoolean:
+    case CK_NonAtomicToAtomic:
+    case CK_ObjCObjectLValueCast:
+    case CK_PointerToBoolean:
+    case CK_PointerToIntegral:
+    case CK_ToUnion:
+    case CK_ToVoid:
+    case CK_UserDefinedConversion:
+    case CK_VectorSplat:
+    case CK_ZeroToOCLOpaqueType:
+      llvm_unreachable("unexpected cast for emitPointerWithAlignment");
+    }
+  }
+
+  // Unary &
+  if (const UnaryOperator *uo = dyn_cast<UnaryOperator>(expr)) {
+    // TODO(cir): maybe we should use cir.unary for pointers here instead.
+    if (uo->getOpcode() == UO_AddrOf) {
+      cgm.errorNYI(expr->getSourceRange(), "emitPointerWithAlignment: unary &");
+      return Address::invalid();
+    }
+  }
+
+  // std::addressof and variants.
+  if (auto const *call = dyn_cast<CallExpr>(expr)) {
+    switch (call->getBuiltinCallee()) {
+    default:
+      break;
+    case Builtin::BIaddressof:
+    case Builtin::BI__addressof:
+    case Builtin::BI__builtin_addressof: {
+      cgm.errorNYI(expr->getSourceRange(),
+                   "emitPointerWithAlignment: builtin addressof");
+      return Address::invalid();
+    }
+    }
+  }
+
+  // Otherwise, use the alignment of the type.
+  return makeNaturalAddressForPointer(
+      emitScalarExpr(expr), expr->getType()->getPointeeType(), CharUnits());
+}
+
 void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
                                             bool isInit) {
   if (!dst.isSimple()) {
@@ -193,8 +334,25 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *e) {
 
   switch (op) {
   case UO_Deref: {
-    cgm.errorNYI(e->getSourceRange(), "UnaryOp dereference");
-    return LValue();
+    QualType t = e->getSubExpr()->getType()->getPointeeType();
+    assert(!t.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
+
+    assert(!cir::MissingFeatures::lvalueBaseInfo());
+    assert(!cir::MissingFeatures::opTBAA());
+    Address addr = emitPointerWithAlignment(e->getSubExpr());
+
+    // Tag 'load' with deref attribute.
+    // FIXME: This misses some derefence cases and has problematic interactions
+    // with other operators.
+    if (auto loadOp =
+            dyn_cast<cir::LoadOp>(addr.getPointer().getDefiningOp())) {
+      loadOp.setIsDerefAttr(mlir::UnitAttr::get(&getMLIRContext()));
+    }
+
+    LValue lv = LValue::makeAddr(addr, t);
+    assert(!cir::MissingFeatures::addressSpace());
+    assert(!cir::MissingFeatures::setNonGC());
+    return lv;
   }
   case UO_Real:
   case UO_Imag: {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index ed49f39..4042f5d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -161,6 +161,11 @@ public:
     return VisitCastExpr(e);
   }
 
+  mlir::Value VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *e) {
+    return cgf.cgm.emitNullConstant(e->getType(),
+                                    cgf.getLoc(e->getSourceRange()));
+  }
+
   /// Perform a pointer to boolean conversion.
   mlir::Value emitPointerToBoolConversion(mlir::Value v, QualType qt) {
     // TODO(cir): comparing the ptr to null is done when lowering CIR to LLVM.
@@ -444,6 +449,22 @@ public:
     llvm_unreachable("Unexpected signed overflow behavior kind");
   }
 
+  mlir::Value VisitUnaryAddrOf(const UnaryOperator *e) {
+    if (llvm::isa<MemberPointerType>(e->getType())) {
+      cgf.cgm.errorNYI(e->getSourceRange(), "Address of member pointer");
+      return builder.getNullPtr(cgf.convertType(e->getType()),
+                                cgf.getLoc(e->getExprLoc()));
+    }
+
+    return cgf.emitLValue(e->getSubExpr()).getPointer();
+  }
+
+  mlir::Value VisitUnaryDeref(const UnaryOperator *e) {
+    if (e->getType()->isVoidType())
+      return Visit(e->getSubExpr()); // the actual value should be unused
+    return emitLoadOfLValue(e);
+  }
+
   mlir::Value VisitUnaryPlus(const UnaryOperator *e) {
     return emitUnaryPlusOrMinus(e, cir::UnaryOpKind::Plus);
   }
@@ -937,9 +958,11 @@ mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e,
 }
 
 [[maybe_unused]] static bool mustVisitNullValue(const Expr *e) {
-  // If a null pointer expression's type is the C++0x nullptr_t, then
-  // it's not necessarily a simple constant and it must be evaluated
+  // If a null pointer expression's type is the C++0x nullptr_t and
+  // the expression is not a simple literal, it must be evaluated
   // for its potential side effects.
+  if (isa<IntegerLiteral>(e) || isa<CXXNullPtrLiteralExpr>(e))
+    return false;
   return e->getType()->isNullPtrType();
 }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index f505ed8..dde665a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -222,6 +222,17 @@ public:
     // TODO: Add symbol table support
   }
 
+  /// Construct an address with the natural alignment of T. If a pointer to T
+  /// is expected to be signed, the pointer passed to this function must have
+  /// been signed, and the returned Address will have the pointer authentication
+  /// information needed to authenticate the signed pointer.
+  Address makeNaturalAddressForPointer(mlir::Value ptr, QualType t,
+                                       CharUnits alignment) {
+    if (alignment.isZero())
+      alignment = cgm.getNaturalTypeAlignment(t);
+    return Address(ptr, convertTypeForMem(t), alignment);
+  }
+
   cir::FuncOp generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
                            cir::FuncType funcType);
 
@@ -468,6 +479,18 @@ public:
   /// FIXME: document this function better.
   LValue emitLValue(const clang::Expr *e);
 
+  /// Given an expression with a pointer type, emit the value and compute our
+  /// best estimate of the alignment of the pointee.
+  ///
+  /// One reasonable way to use this information is when there's a language
+  /// guarantee that the pointer must be aligned to some stricter value, and
+  /// we're simply trying to ensure that sufficiently obvious uses of under-
+  /// aligned objects don't get miscompiled; for example, a placement new
+  /// into the address of a local variable.  In such a case, it's quite
+  /// reasonable to just ignore the returned alignment when it isn't from an
+  /// explicit source.
+  Address emitPointerWithAlignment(const clang::Expr *expr);
+
   mlir::LogicalResult emitReturnStmt(const clang::ReturnStmt &s);
 
   /// Emit a conversion from the specified type to the specified destination
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index f0e9b03..78d995b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -74,6 +74,57 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
                      builder.getStringAttr(getTriple().str()));
 }
 
+CharUnits CIRGenModule::getNaturalTypeAlignment(QualType t) {
+  assert(!cir::MissingFeatures::opTBAA());
+
+  // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But
+  // that doesn't return the information we need to compute BaseInfo.
+
+  // Honor alignment typedef attributes even on incomplete types.
+  // We also honor them straight for C++ class types, even as pointees;
+  // there's an expressivity gap here.
+  if (const auto *tt = t->getAs<TypedefType>()) {
+    if (unsigned align = tt->getDecl()->getMaxAlignment()) {
+      assert(!cir::MissingFeatures::lvalueBaseInfo());
+      return astContext.toCharUnitsFromBits(align);
+    }
+  }
+
+  // Analyze the base element type, so we don't get confused by incomplete
+  // array types.
+  t = astContext.getBaseElementType(t);
+
+  if (t->isIncompleteType()) {
+    // We could try to replicate the logic from
+    // ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the
+    // type is incomplete, so it's impossible to test. We could try to reuse
+    // getTypeAlignIfKnown, but that doesn't return the information we need
+    // to set BaseInfo.  So just ignore the possibility that the alignment is
+    // greater than one.
+    assert(!cir::MissingFeatures::lvalueBaseInfo());
+    return CharUnits::One();
+  }
+
+  assert(!cir::MissingFeatures::lvalueBaseInfo());
+
+  CharUnits alignment;
+  if (t.getQualifiers().hasUnaligned()) {
+    alignment = CharUnits::One();
+  } else {
+    assert(!cir::MissingFeatures::alignCXXRecordDecl());
+    alignment = astContext.getTypeAlignInChars(t);
+  }
+
+  // Cap to the global maximum type alignment unless the alignment
+  // was somehow explicit on the type.
+  if (unsigned maxAlign = astContext.getLangOpts().MaxTypeAlign) {
+    if (alignment.getQuantity() > maxAlign &&
+        !astContext.isAlignmentRequired(t))
+      alignment = CharUnits::fromQuantity(maxAlign);
+  }
+  return alignment;
+}
+
 mlir::Location CIRGenModule::getLoc(SourceLocation cLoc) {
   assert(cLoc.isValid() && "expected valid source location");
   const SourceManager &sm = astContext.getSourceManager();
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index ab4545e..66e9faa 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -89,6 +89,10 @@ public:
   mlir::Location getLoc(clang::SourceLocation cLoc);
   mlir::Location getLoc(clang::SourceRange cRange);
 
+  /// FIXME: this could likely be a common helper and not necessarily related
+  /// with codegen.
+  clang::CharUnits getNaturalTypeAlignment(clang::QualType t);
+
   void emitTopLevelDecl(clang::Decl *decl);
 
   bool verifyModule() const;
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index 1e47ccc..68aee63 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -183,6 +183,14 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
       resultType = cgm.SInt32Ty;
       break;
 
+    case BuiltinType::NullPtr:
+      // Add proper CIR type for it? this looks mostly useful for sema related
+      // things (like for overloads accepting void), for now, given that
+      // `sizeof(std::nullptr_t)` is equal to `sizeof(void *)`, model
+      // std::nullptr_t as !cir.ptr<!void>
+      resultType = builder.getVoidPtrTy();
+      break;
+
     default:
       cgm.errorNYI(SourceLocation(), "processing of built-in type", type);
       resultType = cgm.SInt32Ty;
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h
index 73948f5..4021206 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h
@@ -74,7 +74,7 @@ public:
 
   /// Return whether a type can be zero-initialized (in the C++ sense) with an
   /// LLVM zeroinitializer.
-  bool isZeroInitializable(clang::QualType t);
+  bool isZeroInitializable(clang::QualType ty);
 };
 
 } // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h
index d22d518..68aecc6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenValue.h
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -91,6 +91,7 @@ class LValue {
   mlir::Type elementType;
 
   void initialize(clang::QualType type, clang::Qualifiers quals) {
+    assert(!cir::MissingFeatures::lvalueBaseInfo());
     this->type = type;
     this->quals = quals;
   }
@@ -123,6 +124,7 @@ public:
     r.v = address.getPointer();
     r.elementType = address.getElementType();
     r.initialize(t, t.getQualifiers());
+    assert(!cir::MissingFeatures::lvalueBaseInfo());
     return r;
   }
 };
author	Andy Kaylor <akaylor@nvidia.com>	2025-04-08 10:32:03 -0700
committer	GitHub <noreply@github.com>	2025-04-08 10:32:03 -0700
commit	4928093a21cea9bd76d1e47455e990874ad352df (patch)
tree	84552dc868642a1b67ff52927ce8b3964cd75d05 /clang/lib/CIR/CodeGen
parent	9bfb4b8fb194c1df5b082888abc03d095e39b6e9 (diff)
download	llvm-4928093a21cea9bd76d1e47455e990874ad352df.zip llvm-4928093a21cea9bd76d1e47455e990874ad352df.tar.gz llvm-4928093a21cea9bd76d1e47455e990874ad352df.tar.bz2