diff options
Diffstat (limited to 'clang/lib/CIR/CodeGen')
41 files changed, 7047 insertions, 641 deletions
diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h index a67cbad..a425eeb 100644 --- a/clang/lib/CIR/CodeGen/Address.h +++ b/clang/lib/CIR/CodeGen/Address.h @@ -16,9 +16,11 @@ #include "mlir/IR/Value.h" #include "clang/AST/CharUnits.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/Support/Casting.h" namespace clang::CIRGen { @@ -75,6 +77,12 @@ public: return Address(newPtr, getElementType(), getAlignment()); } + /// Return address with different alignment, but same pointer and element + /// type. + Address withAlignment(clang::CharUnits newAlignment) const { + return Address(getPointer(), getElementType(), newAlignment); + } + /// Return address with different element type, a bitcast pointer, and /// the same alignment. Address withElementType(CIRGenBuilderTy &builder, mlir::Type ElemTy) const; @@ -114,6 +122,11 @@ public: return elementType; } + cir::TargetAddressSpaceAttr getAddressSpace() const { + auto ptrTy = mlir::dyn_cast<cir::PointerType>(getType()); + return ptrTy.getAddrSpace(); + } + clang::CharUnits getAlignment() const { return alignment; } /// Get the operation which defines this address. diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 7db6e28..4c94db5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -27,6 +27,7 @@ class AtomicInfo { CharUnits atomicAlign; CharUnits valueAlign; TypeEvaluationKind evaluationKind = cir::TEK_Scalar; + bool useLibCall = true; LValue lvalue; mlir::Location loc; @@ -62,8 +63,8 @@ public: assert(!cir::MissingFeatures::atomicInfo()); cgf.cgm.errorNYI(loc, "AtomicInfo: non-simple lvalue"); } - - assert(!cir::MissingFeatures::atomicUseLibCall()); + useLibCall = !ctx.getTargetInfo().hasBuiltinAtomic( + atomicSizeInBits, ctx.toBits(lvalue.getAlignment())); } QualType getValueType() const { return valueTy; } @@ -75,6 +76,8 @@ public: assert(!cir::MissingFeatures::atomicInfoGetAtomicPointer()); return nullptr; } + bool shouldUseLibCall() const { return useLibCall; } + const LValue &getAtomicLValue() const { return lvalue; } Address getAtomicAddress() const { mlir::Type elemTy; if (lvalue.isSimple()) { @@ -96,6 +99,8 @@ public: bool emitMemSetZeroIfNecessary() const; + mlir::Value getScalarRValValueOrNull(RValue rvalue) const; + /// Cast the given pointer to an integer pointer suitable for atomic /// operations on the source. Address castToAtomicIntPointer(Address addr) const; @@ -105,6 +110,9 @@ public: /// copy the value across. Address convertToAtomicIntPointer(Address addr) const; + /// Converts a rvalue to integer value. + mlir::Value convertRValueToInt(RValue rvalue, bool cmpxchg = false) const; + /// Copy an atomic r-value into atomic-layout memory. void emitCopyIntoMemory(RValue rvalue) const; @@ -195,6 +203,12 @@ Address AtomicInfo::createTempAlloca() const { return tempAlloca; } +mlir::Value AtomicInfo::getScalarRValValueOrNull(RValue rvalue) const { + if (rvalue.isScalar() && (!hasPadding() || !lvalue.isSimple())) + return rvalue.getValue(); + return nullptr; +} + Address AtomicInfo::castToAtomicIntPointer(Address addr) const { auto intTy = mlir::dyn_cast<cir::IntType>(addr.getElementType()); // Don't bother with int casts if the integer size is the same. @@ -211,10 +225,38 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const { return false; cgf.cgm.errorNYI(loc, - "AtomicInfo::emitMemSetZeroIfNecessary: emit memset zero"); + "AtomicInfo::emitMemSetZeroIfNecaessary: emit memset zero"); return false; } +/// Return true if \param valueTy is a type that should be casted to integer +/// around the atomic memory operation. If \param cmpxchg is true, then the +/// cast of a floating point type is made as that instruction can not have +/// floating point operands. TODO: Allow compare-and-exchange and FP - see +/// comment in CIRGenAtomicExpandPass.cpp. +static bool shouldCastToInt(mlir::Type valueTy, bool cmpxchg) { + if (cir::isAnyFloatingPointType(valueTy)) + return isa<cir::FP80Type>(valueTy) || cmpxchg; + return !isa<cir::IntType>(valueTy) && !isa<cir::PointerType>(valueTy); +} + +mlir::Value AtomicInfo::convertRValueToInt(RValue rvalue, bool cmpxchg) const { + // If we've got a scalar value of the right size, try to avoid going + // through memory. Floats get casted if needed by AtomicExpandPass. + if (mlir::Value value = getScalarRValValueOrNull(rvalue)) { + if (!shouldCastToInt(value.getType(), cmpxchg)) + return cgf.emitToMemory(value, valueTy); + + cgf.cgm.errorNYI( + loc, "AtomicInfo::convertRValueToInt: cast scalar rvalue to int"); + return nullptr; + } + + cgf.cgm.errorNYI( + loc, "AtomicInfo::convertRValueToInt: cast non-scalar rvalue to int"); + return nullptr; +} + /// Copy an r-value into memory as part of storing to an atomic type. /// This needs to create a bit-pattern suitable for atomic operations. void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const { @@ -244,6 +286,31 @@ void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const { } } +static void emitMemOrderDefaultCaseLabel(CIRGenBuilderTy &builder, + mlir::Location loc) { + mlir::ArrayAttr ordersAttr = builder.getArrayAttr({}); + mlir::OpBuilder::InsertPoint insertPoint; + cir::CaseOp::create(builder, loc, ordersAttr, cir::CaseOpKind::Default, + insertPoint); + builder.restoreInsertionPoint(insertPoint); +} + +// Create a "case" operation with the given list of orders as its values. Also +// create the region that will hold the body of the switch-case label. +static void emitMemOrderCaseLabel(CIRGenBuilderTy &builder, mlir::Location loc, + mlir::Type orderType, + llvm::ArrayRef<cir::MemOrder> orders) { + llvm::SmallVector<mlir::Attribute, 2> orderAttrs; + for (cir::MemOrder order : orders) + orderAttrs.push_back(cir::IntAttr::get(orderType, static_cast<int>(order))); + mlir::ArrayAttr ordersAttr = builder.getArrayAttr(orderAttrs); + + mlir::OpBuilder::InsertPoint insertPoint; + cir::CaseOp::create(builder, loc, ordersAttr, cir::CaseOpKind::Anyof, + insertPoint); + builder.restoreInsertionPoint(insertPoint); +} + static void emitAtomicCmpXchg(CIRGenFunction &cgf, AtomicExpr *e, bool isWeak, Address dest, Address ptr, Address val1, Address val2, uint64_t size, @@ -577,6 +644,9 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, case AtomicExpr::AO__scoped_atomic_nand_fetch: case AtomicExpr::AO__scoped_atomic_fetch_nand: + + case AtomicExpr::AO__scoped_atomic_uinc_wrap: + case AtomicExpr::AO__scoped_atomic_udec_wrap: cgf.cgm.errorNYI(expr->getSourceRange(), "emitAtomicOp: expr op NYI"); return; } @@ -615,6 +685,70 @@ static bool isMemOrderValid(uint64_t order, bool isStore, bool isLoad) { return true; } +static void emitAtomicExprWithDynamicMemOrder( + CIRGenFunction &cgf, mlir::Value order, AtomicExpr *e, Address dest, + Address ptr, Address val1, Address val2, Expr *isWeakExpr, + Expr *orderFailExpr, uint64_t size, bool isStore, bool isLoad) { + // The memory order is not known at compile-time. The atomic operations + // can't handle runtime memory orders; the memory order must be hard coded. + // Generate a "switch" statement that converts a runtime value into a + // compile-time value. + CIRGenBuilderTy &builder = cgf.getBuilder(); + cir::SwitchOp::create( + builder, order.getLoc(), order, + [&](mlir::OpBuilder &, mlir::Location loc, mlir::OperationState &) { + mlir::Block *switchBlock = builder.getBlock(); + + auto emitMemOrderCase = [&](llvm::ArrayRef<cir::MemOrder> caseOrders, + cir::MemOrder actualOrder) { + if (caseOrders.empty()) + emitMemOrderDefaultCaseLabel(builder, loc); + else + emitMemOrderCaseLabel(builder, loc, order.getType(), caseOrders); + emitAtomicOp(cgf, e, dest, ptr, val1, val2, isWeakExpr, orderFailExpr, + size, actualOrder); + builder.createBreak(loc); + builder.setInsertionPointToEnd(switchBlock); + }; + + // default: + // Use memory_order_relaxed for relaxed operations and for any memory + // order value that is not supported. There is no good way to report + // an unsupported memory order at runtime, hence the fallback to + // memory_order_relaxed. + emitMemOrderCase(/*caseOrders=*/{}, cir::MemOrder::Relaxed); + + if (!isStore) { + // case consume: + // case acquire: + // memory_order_consume is not implemented; it is always treated + // like memory_order_acquire. These memory orders are not valid for + // write-only operations. + emitMemOrderCase({cir::MemOrder::Consume, cir::MemOrder::Acquire}, + cir::MemOrder::Acquire); + } + + if (!isLoad) { + // case release: + // memory_order_release is not valid for read-only operations. + emitMemOrderCase({cir::MemOrder::Release}, cir::MemOrder::Release); + } + + if (!isLoad && !isStore) { + // case acq_rel: + // memory_order_acq_rel is only valid for read-write operations. + emitMemOrderCase({cir::MemOrder::AcquireRelease}, + cir::MemOrder::AcquireRelease); + } + + // case seq_cst: + emitMemOrderCase({cir::MemOrder::SequentiallyConsistent}, + cir::MemOrder::SequentiallyConsistent); + + builder.createYield(loc); + }); +} + RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { QualType atomicTy = e->getPtr()->getType()->getPointeeType(); QualType memTy = atomicTy; @@ -802,9 +936,9 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { emitAtomicOp(*this, e, dest, ptr, val1, val2, isWeakExpr, orderFailExpr, size, static_cast<cir::MemOrder>(ord)); } else { - assert(!cir::MissingFeatures::atomicExpr()); - cgm.errorNYI(e->getSourceRange(), "emitAtomicExpr: dynamic memory order"); - return RValue::get(nullptr); + emitAtomicExprWithDynamicMemOrder(*this, order, e, dest, ptr, val1, val2, + isWeakExpr, orderFailExpr, size, isStore, + isLoad); } if (resultTy->isVoidType()) @@ -815,6 +949,79 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { e->getExprLoc()); } +void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest, bool isInit) { + bool isVolatile = dest.isVolatileQualified(); + auto order = cir::MemOrder::SequentiallyConsistent; + if (!dest.getType()->isAtomicType()) { + assert(!cir::MissingFeatures::atomicMicrosoftVolatile()); + } + return emitAtomicStore(rvalue, dest, order, isVolatile, isInit); +} + +/// Emit a store to an l-value of atomic type. +/// +/// Note that the r-value is expected to be an r-value of the atomic type; this +/// means that for aggregate r-values, it should include storage for any padding +/// that was necessary. +void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest, + cir::MemOrder order, bool isVolatile, + bool isInit) { + // If this is an aggregate r-value, it should agree in type except + // maybe for address-space qualification. + mlir::Location loc = dest.getPointer().getLoc(); + assert(!rvalue.isAggregate() || + rvalue.getAggregateAddress().getElementType() == + dest.getAddress().getElementType()); + + AtomicInfo atomics(*this, dest, loc); + LValue lvalue = atomics.getAtomicLValue(); + + if (lvalue.isSimple()) { + // If this is an initialization, just put the value there normally. + if (isInit) { + atomics.emitCopyIntoMemory(rvalue); + return; + } + + // Check whether we should use a library call. + if (atomics.shouldUseLibCall()) { + assert(!cir::MissingFeatures::atomicUseLibCall()); + cgm.errorNYI(loc, "emitAtomicStore: atomic store with library call"); + return; + } + + // Okay, we're doing this natively. + mlir::Value valueToStore = atomics.convertRValueToInt(rvalue); + + // Do the atomic store. + Address addr = atomics.getAtomicAddress(); + if (mlir::Value value = atomics.getScalarRValValueOrNull(rvalue)) { + if (shouldCastToInt(value.getType(), /*CmpXchg=*/false)) { + addr = atomics.castToAtomicIntPointer(addr); + valueToStore = + builder.createIntCast(valueToStore, addr.getElementType()); + } + } + cir::StoreOp store = builder.createStore(loc, valueToStore, addr); + + // Initializations don't need to be atomic. + if (!isInit) { + assert(!cir::MissingFeatures::atomicOpenMP()); + store.setMemOrder(order); + } + + // Other decoration. + if (isVolatile) + store.setIsVolatile(true); + + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + return; + } + + cgm.errorNYI(loc, "emitAtomicStore: non-simple atomic lvalue"); + assert(!cir::MissingFeatures::opLoadStoreAtomic()); +} + void CIRGenFunction::emitAtomicInit(Expr *init, LValue dest) { AtomicInfo atomics(*this, dest, getLoc(init->getSourceRange())); diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index e5066fa..bf13eee 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -189,6 +189,11 @@ public: return getType<cir::RecordType>(nameAttr, kind); } + cir::DataMemberAttr getDataMemberAttr(cir::DataMemberType ty, + unsigned memberIndex) { + return cir::DataMemberAttr::get(ty, memberIndex); + } + // Return true if the value is a null constant such as null pointer, (+0.0) // for floating-point or zero initializer bool isNullValue(mlir::Attribute attr) const { @@ -212,6 +217,16 @@ public: &ignored); return fv.bitwiseIsEqual(fpVal); } + if (const auto recordVal = mlir::dyn_cast<cir::ConstRecordAttr>(attr)) { + for (const auto elt : recordVal.getMembers()) { + // FIXME(cir): the record's ID should not be considered a member. + if (mlir::isa<mlir::StringAttr>(elt)) + continue; + if (!isNullValue(elt)) + return false; + } + return true; + } if (const auto arrayVal = mlir::dyn_cast<cir::ConstArrayAttr>(attr)) { if (mlir::isa<mlir::StringAttr>(arrayVal.getElts())) @@ -315,8 +330,10 @@ public: return getConstantInt(loc, getUInt32Ty(), c); } cir::ConstantOp getSInt64(uint64_t c, mlir::Location loc) { - cir::IntType sInt64Ty = getSInt64Ty(); - return cir::ConstantOp::create(*this, loc, cir::IntAttr::get(sInt64Ty, c)); + return getConstantInt(loc, getSInt64Ty(), c); + } + cir::ConstantOp getUInt64(uint64_t c, mlir::Location loc) { + return getConstantInt(loc, getUInt64Ty(), c); } mlir::Value createNeg(mlir::Value value) { @@ -332,6 +349,11 @@ public: llvm_unreachable("negation for the given type is NYI"); } + cir::IsFPClassOp createIsFPClass(mlir::Location loc, mlir::Value src, + cir::FPClassTest flags) { + return cir::IsFPClassOp::create(*this, loc, src, flags); + } + // TODO: split this to createFPExt/createFPTrunc when we have dedicated cast // operations. mlir::Value createFloatingCast(mlir::Value v, mlir::Type destType) { @@ -403,6 +425,19 @@ public: return Address(baseAddr, destType, addr.getAlignment()); } + Address createDerivedClassAddr(mlir::Location loc, Address addr, + mlir::Type destType, unsigned offset, + bool assumeNotNull) { + if (destType == addr.getElementType()) + return addr; + + cir::PointerType ptrTy = getPointerTo(destType); + auto derivedAddr = + cir::DerivedClassAddrOp::create(*this, loc, ptrTy, addr.getPointer(), + mlir::APInt(64, offset), assumeNotNull); + return Address(derivedAddr, destType, addr.getAlignment()); + } + mlir::Value createVTTAddrPoint(mlir::Location loc, mlir::Type retTy, mlir::Value addr, uint64_t offset) { return cir::VTTAddrPointOp::create(*this, loc, retTy, @@ -572,6 +607,35 @@ public: info.isSigned, isLvalueVolatile, addr.getAlignment().getAsAlign().value()); } + + cir::VecShuffleOp + createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2, + llvm::ArrayRef<mlir::Attribute> maskAttrs) { + auto vecType = mlir::cast<cir::VectorType>(vec1.getType()); + auto resultTy = cir::VectorType::get(getContext(), vecType.getElementType(), + maskAttrs.size()); + return cir::VecShuffleOp::create(*this, loc, resultTy, vec1, vec2, + getArrayAttr(maskAttrs)); + } + + cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1, + mlir::Value vec2, + llvm::ArrayRef<int64_t> mask) { + auto maskAttrs = llvm::to_vector_of<mlir::Attribute>( + llvm::map_range(mask, [&](int32_t idx) { + return cir::IntAttr::get(getSInt32Ty(), idx); + })); + return createVecShuffle(loc, vec1, vec2, maskAttrs); + } + + cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1, + llvm::ArrayRef<int64_t> mask) { + /// Create a unary shuffle. The second vector operand of the IR instruction + /// is poison. + cir::ConstantOp poison = + getConstant(loc, cir::PoisonAttr::get(vec1.getType())); + return createVecShuffle(loc, vec1, poison, mask); + } }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index e35100f..16c006d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -12,16 +12,18 @@ //===----------------------------------------------------------------------===// #include "CIRGenCall.h" -#include "CIRGenConstantEmitter.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" #include "CIRGenValue.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Value.h" #include "mlir/Support/LLVM.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/Expr.h" #include "clang/AST/GlobalDecl.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/OperatorKinds.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/Support/ErrorHandling.h" @@ -58,6 +60,72 @@ static RValue emitBuiltinBitOp(CIRGenFunction &cgf, const CallExpr *e, return RValue::get(result); } +static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf, + const CallExpr *expr, + cir::SyncScopeKind syncScope) { + CIRGenBuilderTy &builder = cgf.getBuilder(); + mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0)); + + auto constOrdering = orderingVal.getDefiningOp<cir::ConstantOp>(); + + if (!constOrdering) { + // TODO(cir): Emit code to switch on `orderingVal`, + // and creating the fence op for valid values. + cgf.cgm.errorNYI("Variable atomic fence ordering"); + return {}; + } + + auto constOrderingAttr = constOrdering.getValueAttr<cir::IntAttr>(); + assert(constOrderingAttr && "Expected integer constant for ordering"); + + auto ordering = static_cast<cir::MemOrder>(constOrderingAttr.getUInt()); + + cir::AtomicFence::create( + builder, cgf.getLoc(expr->getSourceRange()), ordering, + cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope)); + + return {}; +} + +namespace { +struct WidthAndSignedness { + unsigned width; + bool isSigned; +}; +} // namespace + +static WidthAndSignedness +getIntegerWidthAndSignedness(const clang::ASTContext &astContext, + const clang::QualType type) { + assert(type->isIntegerType() && "Given type is not an integer."); + unsigned width = type->isBooleanType() ? 1 + : type->isBitIntType() ? astContext.getIntWidth(type) + : astContext.getTypeInfo(type).Width; + bool isSigned = type->isSignedIntegerType(); + return {width, isSigned}; +} + +// Given one or more integer types, this function produces an integer type that +// encompasses them: any value in one of the given types could be expressed in +// the encompassing type. +static struct WidthAndSignedness +EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> types) { + assert(types.size() > 0 && "Empty list of types."); + + // If any of the given types is signed, we must return a signed type. + bool isSigned = llvm::any_of(types, [](const auto &t) { return t.isSigned; }); + + // The encompassing type must have a width greater than or equal to the width + // of the specified types. Additionally, if the encompassing type is signed, + // its width must be strictly greater than the width of any unsigned types + // given. + unsigned width = 0; + for (const auto &type : types) + width = std::max(width, type.width + (isSigned && !type.isSigned)); + + return {width, isSigned}; +} + RValue CIRGenFunction::emitRotate(const CallExpr *e, bool isRotateLeft) { mlir::Value input = emitScalarExpr(e->getArg(0)); mlir::Value amount = emitScalarExpr(e->getArg(1)); @@ -93,6 +161,83 @@ static RValue emitUnaryFPBuiltin(CIRGenFunction &cgf, const CallExpr &e) { return RValue::get(call->getResult(0)); } +static RValue errorBuiltinNYI(CIRGenFunction &cgf, const CallExpr *e, + unsigned builtinID) { + + if (cgf.getContext().BuiltinInfo.isLibFunction(builtinID)) { + cgf.cgm.errorNYI( + e->getSourceRange(), + std::string("unimplemented X86 library function builtin call: ") + + cgf.getContext().BuiltinInfo.getName(builtinID)); + } else { + cgf.cgm.errorNYI(e->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + cgf.getContext().BuiltinInfo.getName(builtinID)); + } + + return cgf.getUndefRValue(e->getType()); +} + +static RValue emitBuiltinAlloca(CIRGenFunction &cgf, const CallExpr *e, + unsigned builtinID) { + assert(builtinID == Builtin::BI__builtin_alloca || + builtinID == Builtin::BI__builtin_alloca_uninitialized || + builtinID == Builtin::BIalloca || builtinID == Builtin::BI_alloca); + + // Get alloca size input + mlir::Value size = cgf.emitScalarExpr(e->getArg(0)); + + // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. + const TargetInfo &ti = cgf.getContext().getTargetInfo(); + const CharUnits suitableAlignmentInBytes = + cgf.getContext().toCharUnitsFromBits(ti.getSuitableAlign()); + + // Emit the alloca op with type `u8 *` to match the semantics of + // `llvm.alloca`. We later bitcast the type to `void *` to match the + // semantics of C/C++ + // FIXME(cir): It may make sense to allow AllocaOp of type `u8` to return a + // pointer of type `void *`. This will require a change to the allocaOp + // verifier. + CIRGenBuilderTy &builder = cgf.getBuilder(); + mlir::Value allocaAddr = builder.createAlloca( + cgf.getLoc(e->getSourceRange()), builder.getUInt8PtrTy(), + builder.getUInt8Ty(), "bi_alloca", suitableAlignmentInBytes, size); + + // Initialize the allocated buffer if required. + if (builtinID != Builtin::BI__builtin_alloca_uninitialized) { + // Initialize the alloca with the given size and alignment according to + // the lang opts. Only the trivial non-initialization is supported for + // now. + + switch (cgf.getLangOpts().getTrivialAutoVarInit()) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + // Nothing to initialize. + break; + case LangOptions::TrivialAutoVarInitKind::Zero: + case LangOptions::TrivialAutoVarInitKind::Pattern: + cgf.cgm.errorNYI("trivial auto var init"); + break; + } + } + + // An alloca will always return a pointer to the alloca (stack) address + // space. This address space need not be the same as the AST / Language + // default (e.g. in C / C++ auto vars are in the generic address space). At + // the AST level this is handled within CreateTempAlloca et al., but for the + // builtin / dynamic alloca we have to handle it here. + + if (!cir::isMatchingAddressSpace( + cgf.getCIRAllocaAddressSpace(), + e->getType()->getPointeeType().getAddressSpace())) { + cgf.cgm.errorNYI(e->getSourceRange(), + "Non-default address space for alloca"); + } + + // Bitcast the alloca to the expected type. + return RValue::get(builder.createBitcast( + allocaAddr, builder.getVoidPtrTy(cgf.getCIRAllocaAddressSpace()))); +} + RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, const CallExpr *e, ReturnValueSlot returnValue) { @@ -148,58 +293,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_va_end: emitVAEnd(emitVAListRef(e->getArg(0)).getPointer()); return {}; - - case Builtin::BIalloca: - case Builtin::BI_alloca: - case Builtin::BI__builtin_alloca_uninitialized: - case Builtin::BI__builtin_alloca: { - // Get alloca size input - mlir::Value size = emitScalarExpr(e->getArg(0)); - - // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. - const TargetInfo &ti = getContext().getTargetInfo(); - const CharUnits suitableAlignmentInBytes = - getContext().toCharUnitsFromBits(ti.getSuitableAlign()); - - // Emit the alloca op with type `u8 *` to match the semantics of - // `llvm.alloca`. We later bitcast the type to `void *` to match the - // semantics of C/C++ - // FIXME(cir): It may make sense to allow AllocaOp of type `u8` to return a - // pointer of type `void *`. This will require a change to the allocaOp - // verifier. - mlir::Value allocaAddr = builder.createAlloca( - getLoc(e->getSourceRange()), builder.getUInt8PtrTy(), - builder.getUInt8Ty(), "bi_alloca", suitableAlignmentInBytes, size); - - // Initialize the allocated buffer if required. - if (builtinID != Builtin::BI__builtin_alloca_uninitialized) { - // Initialize the alloca with the given size and alignment according to - // the lang opts. Only the trivial non-initialization is supported for - // now. - - switch (getLangOpts().getTrivialAutoVarInit()) { - case LangOptions::TrivialAutoVarInitKind::Uninitialized: - // Nothing to initialize. - break; - case LangOptions::TrivialAutoVarInitKind::Zero: - case LangOptions::TrivialAutoVarInitKind::Pattern: - cgm.errorNYI("trivial auto var init"); - break; - } - } - - // An alloca will always return a pointer to the alloca (stack) address - // space. This address space need not be the same as the AST / Language - // default (e.g. in C / C++ auto vars are in the generic address space). At - // the AST level this is handled within CreateTempAlloca et al., but for the - // builtin / dynamic alloca we have to handle it here. - assert(!cir::MissingFeatures::addressSpace()); - - // Bitcast the alloca to the expected type. - return RValue::get( - builder.createBitcast(allocaAddr, builder.getVoidPtrTy())); + case Builtin::BI__builtin_va_copy: { + mlir::Value dstPtr = emitVAListRef(e->getArg(0)).getPointer(); + mlir::Value srcPtr = emitVAListRef(e->getArg(1)).getPointer(); + cir::VACopyOp::create(builder, dstPtr.getLoc(), dstPtr, srcPtr); + return {}; } - case Builtin::BIcos: case Builtin::BIcosf: case Builtin::BIcosl: @@ -211,6 +310,39 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, assert(!cir::MissingFeatures::fastMathFlags()); return emitUnaryMaybeConstrainedFPBuiltin<cir::CosOp>(*this, *e); + case Builtin::BIceil: + case Builtin::BIceilf: + case Builtin::BIceill: + case Builtin::BI__builtin_ceil: + case Builtin::BI__builtin_ceilf: + case Builtin::BI__builtin_ceilf16: + case Builtin::BI__builtin_ceill: + case Builtin::BI__builtin_ceilf128: + assert(!cir::MissingFeatures::fastMathFlags()); + return emitUnaryMaybeConstrainedFPBuiltin<cir::CeilOp>(*this, *e); + + case Builtin::BIexp: + case Builtin::BIexpf: + case Builtin::BIexpl: + case Builtin::BI__builtin_exp: + case Builtin::BI__builtin_expf: + case Builtin::BI__builtin_expf16: + case Builtin::BI__builtin_expl: + case Builtin::BI__builtin_expf128: + assert(!cir::MissingFeatures::fastMathFlags()); + return emitUnaryMaybeConstrainedFPBuiltin<cir::ExpOp>(*this, *e); + + case Builtin::BIexp2: + case Builtin::BIexp2f: + case Builtin::BIexp2l: + case Builtin::BI__builtin_exp2: + case Builtin::BI__builtin_exp2f: + case Builtin::BI__builtin_exp2f16: + case Builtin::BI__builtin_exp2l: + case Builtin::BI__builtin_exp2f128: + assert(!cir::MissingFeatures::fastMathFlags()); + return emitUnaryMaybeConstrainedFPBuiltin<cir::Exp2Op>(*this, *e); + case Builtin::BIfabs: case Builtin::BIfabsf: case Builtin::BIfabsl: @@ -221,6 +353,16 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_fabsf128: return emitUnaryMaybeConstrainedFPBuiltin<cir::FAbsOp>(*this, *e); + case Builtin::BIfloor: + case Builtin::BIfloorf: + case Builtin::BIfloorl: + case Builtin::BI__builtin_floor: + case Builtin::BI__builtin_floorf: + case Builtin::BI__builtin_floorf16: + case Builtin::BI__builtin_floorl: + case Builtin::BI__builtin_floorf128: + return emitUnaryMaybeConstrainedFPBuiltin<cir::FloorOp>(*this, *e); + case Builtin::BI__assume: case Builtin::BI__builtin_assume: { if (e->getArg(0)->HasSideEffects(getContext())) @@ -398,36 +540,6 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_rotateright64: return emitRotate(e, /*isRotateLeft=*/false); - case Builtin::BI__builtin_return_address: - case Builtin::BI__builtin_frame_address: { - mlir::Location loc = getLoc(e->getExprLoc()); - llvm::APSInt level = e->getArg(0)->EvaluateKnownConstInt(getContext()); - if (builtinID == Builtin::BI__builtin_return_address) { - return RValue::get(cir::ReturnAddrOp::create( - builder, loc, - builder.getConstAPInt(loc, builder.getUInt32Ty(), level))); - } - return RValue::get(cir::FrameAddrOp::create( - builder, loc, - builder.getConstAPInt(loc, builder.getUInt32Ty(), level))); - } - - case Builtin::BI__builtin_trap: - emitTrap(loc, /*createNewBlock=*/true); - return RValue::get(nullptr); - - case Builtin::BI__builtin_unreachable: - emitUnreachable(e->getExprLoc(), /*createNewBlock=*/true); - return RValue::get(nullptr); - - case Builtin::BI__builtin_elementwise_acos: - return emitUnaryFPBuiltin<cir::ACosOp>(*this, *e); - case Builtin::BI__builtin_elementwise_asin: - return emitUnaryFPBuiltin<cir::ASinOp>(*this, *e); - case Builtin::BI__builtin_elementwise_atan: - return emitUnaryFPBuiltin<cir::ATanOp>(*this, *e); - case Builtin::BI__builtin_elementwise_cos: - return emitUnaryFPBuiltin<cir::CosOp>(*this, *e); case Builtin::BI__builtin_coro_id: case Builtin::BI__builtin_coro_promise: case Builtin::BI__builtin_coro_resume: @@ -443,9 +555,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return getUndefRValue(e->getType()); case Builtin::BI__builtin_coro_frame: { - cgm.errorNYI(e->getSourceRange(), "BI__builtin_coro_frame NYI"); - assert(!cir::MissingFeatures::coroutineFrame()); - return getUndefRValue(e->getType()); + return emitCoroutineFrame(); } case Builtin::BI__builtin_coro_free: case Builtin::BI__builtin_coro_size: { @@ -459,6 +569,58 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e, returnValue); } + + case Builtin::BI__builtin_constant_p: { + mlir::Type resultType = convertType(e->getType()); + + const Expr *arg = e->getArg(0); + QualType argType = arg->getType(); + // FIXME: The allowance for Obj-C pointers and block pointers is historical + // and likely a mistake. + if (!argType->isIntegralOrEnumerationType() && !argType->isFloatingType() && + !argType->isObjCObjectPointerType() && !argType->isBlockPointerType()) { + // Per the GCC documentation, only numeric constants are recognized after + // inlining. + return RValue::get( + builder.getConstInt(getLoc(e->getSourceRange()), + mlir::cast<cir::IntType>(resultType), 0)); + } + + if (arg->HasSideEffects(getContext())) { + // The argument is unevaluated, so be conservative if it might have + // side-effects. + return RValue::get( + builder.getConstInt(getLoc(e->getSourceRange()), + mlir::cast<cir::IntType>(resultType), 0)); + } + + mlir::Value argValue = emitScalarExpr(arg); + if (argType->isObjCObjectPointerType()) { + cgm.errorNYI(e->getSourceRange(), + "__builtin_constant_p: Obj-C object pointer"); + return {}; + } + argValue = builder.createBitcast(argValue, convertType(argType)); + + mlir::Value result = cir::IsConstantOp::create( + builder, getLoc(e->getSourceRange()), argValue); + // IsConstantOp returns a bool, but __builtin_constant_p returns an int. + result = builder.createBoolToInt(result, resultType); + return RValue::get(result); + } + case Builtin::BI__builtin_dynamic_object_size: + case Builtin::BI__builtin_object_size: { + unsigned type = + e->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); + auto resType = mlir::cast<cir::IntType>(convertType(e->getType())); + + // We pass this builtin onto the optimizer so that it can figure out the + // object size in more complex cases. + bool isDynamic = builtinID == Builtin::BI__builtin_dynamic_object_size; + return RValue::get(emitBuiltinObjectSize(e->getArg(0), type, resType, + /*EmittedE=*/nullptr, isDynamic)); + } + case Builtin::BI__builtin_prefetch: { auto evaluateOperandAsInt = [&](const Expr *arg) { Expr::EvalResult res; @@ -480,6 +642,696 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, cir::PrefetchOp::create(builder, loc, address, locality, isWrite); return RValue::get(nullptr); } + case Builtin::BI__builtin_readcyclecounter: + case Builtin::BI__builtin_readsteadycounter: + case Builtin::BI__builtin___clear_cache: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_trap: + emitTrap(loc, /*createNewBlock=*/true); + return RValue::getIgnored(); + case Builtin::BI__builtin_verbose_trap: + case Builtin::BI__debugbreak: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_unreachable: + emitUnreachable(e->getExprLoc(), /*createNewBlock=*/true); + return RValue::getIgnored(); + case Builtin::BI__builtin_powi: + case Builtin::BI__builtin_powif: + case Builtin::BI__builtin_powil: + case Builtin::BI__builtin_frexpl: + case Builtin::BI__builtin_frexp: + case Builtin::BI__builtin_frexpf: + case Builtin::BI__builtin_frexpf128: + case Builtin::BI__builtin_frexpf16: + case Builtin::BImodf: + case Builtin::BImodff: + case Builtin::BImodfl: + case Builtin::BI__builtin_modf: + case Builtin::BI__builtin_modff: + case Builtin::BI__builtin_modfl: + case Builtin::BI__builtin_isgreater: + case Builtin::BI__builtin_isgreaterequal: + case Builtin::BI__builtin_isless: + case Builtin::BI__builtin_islessequal: + case Builtin::BI__builtin_islessgreater: + case Builtin::BI__builtin_isunordered: + // From https://clang.llvm.org/docs/LanguageExtensions.html#builtin-isfpclass + // + // The `__builtin_isfpclass()` builtin is a generalization of functions + // isnan, isinf, isfinite and some others defined by the C standard. It tests + // if the floating-point value, specified by the first argument, falls into + // any of data classes, specified by the second argument. + case Builtin::BI__builtin_isnan: { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + assert(!cir::MissingFeatures::fpConstraints()); + mlir::Location loc = getLoc(e->getBeginLoc()); + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest::Nan), + convertType(e->getType()))); + } + + case Builtin::BI__builtin_issignaling: { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + mlir::Location loc = getLoc(e->getBeginLoc()); + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest::SignalingNaN), + convertType(e->getType()))); + } + + case Builtin::BI__builtin_isinf: { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + assert(!cir::MissingFeatures::fpConstraints()); + mlir::Location loc = getLoc(e->getBeginLoc()); + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest::Infinity), + convertType(e->getType()))); + } + case Builtin::BIfinite: + case Builtin::BI__finite: + case Builtin::BIfinitef: + case Builtin::BI__finitef: + case Builtin::BIfinitel: + case Builtin::BI__finitel: + case Builtin::BI__builtin_isfinite: { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + assert(!cir::MissingFeatures::fpConstraints()); + mlir::Location loc = getLoc(e->getBeginLoc()); + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest::Finite), + convertType(e->getType()))); + } + + case Builtin::BI__builtin_isnormal: { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + mlir::Location loc = getLoc(e->getBeginLoc()); + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest::Normal), + convertType(e->getType()))); + } + + case Builtin::BI__builtin_issubnormal: { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + mlir::Location loc = getLoc(e->getBeginLoc()); + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest::Subnormal), + convertType(e->getType()))); + } + + case Builtin::BI__builtin_iszero: { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + mlir::Location loc = getLoc(e->getBeginLoc()); + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest::Zero), + convertType(e->getType()))); + } + case Builtin::BI__builtin_isfpclass: { + Expr::EvalResult result; + if (!e->getArg(1)->EvaluateAsInt(result, cgm.getASTContext())) + break; + + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + mlir::Value v = emitScalarExpr(e->getArg(0)); + uint64_t test = result.Val.getInt().getLimitedValue(); + mlir::Location loc = getLoc(e->getBeginLoc()); + // + return RValue::get(builder.createBoolToInt( + builder.createIsFPClass(loc, v, cir::FPClassTest(test)), + convertType(e->getType()))); + } + case Builtin::BI__builtin_nondeterministic_value: + case Builtin::BI__builtin_elementwise_abs: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_elementwise_acos: + return emitUnaryFPBuiltin<cir::ACosOp>(*this, *e); + case Builtin::BI__builtin_elementwise_asin: + return emitUnaryFPBuiltin<cir::ASinOp>(*this, *e); + case Builtin::BI__builtin_elementwise_atan: + return emitUnaryFPBuiltin<cir::ATanOp>(*this, *e); + case Builtin::BI__builtin_elementwise_atan2: + case Builtin::BI__builtin_elementwise_ceil: + case Builtin::BI__builtin_elementwise_exp: + case Builtin::BI__builtin_elementwise_exp2: + case Builtin::BI__builtin_elementwise_exp10: + case Builtin::BI__builtin_elementwise_ldexp: + case Builtin::BI__builtin_elementwise_log: + case Builtin::BI__builtin_elementwise_log2: + case Builtin::BI__builtin_elementwise_log10: + case Builtin::BI__builtin_elementwise_pow: + case Builtin::BI__builtin_elementwise_bitreverse: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_elementwise_cos: + return emitUnaryFPBuiltin<cir::CosOp>(*this, *e); + case Builtin::BI__builtin_elementwise_cosh: + case Builtin::BI__builtin_elementwise_floor: + case Builtin::BI__builtin_elementwise_popcount: + case Builtin::BI__builtin_elementwise_roundeven: + case Builtin::BI__builtin_elementwise_round: + case Builtin::BI__builtin_elementwise_rint: + case Builtin::BI__builtin_elementwise_nearbyint: + case Builtin::BI__builtin_elementwise_sin: + case Builtin::BI__builtin_elementwise_sinh: + case Builtin::BI__builtin_elementwise_tan: + case Builtin::BI__builtin_elementwise_tanh: + case Builtin::BI__builtin_elementwise_trunc: + case Builtin::BI__builtin_elementwise_canonicalize: + case Builtin::BI__builtin_elementwise_copysign: + case Builtin::BI__builtin_elementwise_fma: + case Builtin::BI__builtin_elementwise_fshl: + case Builtin::BI__builtin_elementwise_fshr: + case Builtin::BI__builtin_elementwise_add_sat: + case Builtin::BI__builtin_elementwise_sub_sat: + case Builtin::BI__builtin_elementwise_max: + case Builtin::BI__builtin_elementwise_min: + case Builtin::BI__builtin_elementwise_maxnum: + case Builtin::BI__builtin_elementwise_minnum: + case Builtin::BI__builtin_elementwise_maximum: + case Builtin::BI__builtin_elementwise_minimum: + case Builtin::BI__builtin_elementwise_maximumnum: + case Builtin::BI__builtin_elementwise_minimumnum: + case Builtin::BI__builtin_reduce_max: + case Builtin::BI__builtin_reduce_min: + case Builtin::BI__builtin_reduce_add: + case Builtin::BI__builtin_reduce_mul: + case Builtin::BI__builtin_reduce_xor: + case Builtin::BI__builtin_reduce_or: + case Builtin::BI__builtin_reduce_and: + case Builtin::BI__builtin_reduce_maximum: + case Builtin::BI__builtin_reduce_minimum: + case Builtin::BI__builtin_matrix_transpose: + case Builtin::BI__builtin_matrix_column_major_load: + case Builtin::BI__builtin_matrix_column_major_store: + case Builtin::BI__builtin_masked_load: + case Builtin::BI__builtin_masked_expand_load: + case Builtin::BI__builtin_masked_gather: + case Builtin::BI__builtin_masked_store: + case Builtin::BI__builtin_masked_compress_store: + case Builtin::BI__builtin_masked_scatter: + case Builtin::BI__builtin_isinf_sign: + case Builtin::BI__builtin_flt_rounds: + case Builtin::BI__builtin_set_flt_rounds: + case Builtin::BI__builtin_fpclassify: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BIalloca: + case Builtin::BI_alloca: + case Builtin::BI__builtin_alloca_uninitialized: + case Builtin::BI__builtin_alloca: + return emitBuiltinAlloca(*this, e, builtinID); + case Builtin::BI__builtin_alloca_with_align_uninitialized: + case Builtin::BI__builtin_alloca_with_align: + case Builtin::BI__builtin_infer_alloc_token: + case Builtin::BIbzero: + case Builtin::BI__builtin_bzero: + case Builtin::BIbcopy: + case Builtin::BI__builtin_bcopy: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BImemcpy: + case Builtin::BI__builtin_memcpy: + case Builtin::BImempcpy: + case Builtin::BI__builtin_mempcpy: + case Builtin::BI__builtin_memcpy_inline: + case Builtin::BI__builtin_char_memchr: + case Builtin::BI__builtin___memcpy_chk: + case Builtin::BI__builtin_objc_memmove_collectable: + case Builtin::BI__builtin___memmove_chk: + case Builtin::BI__builtin_trivially_relocate: + case Builtin::BImemmove: + case Builtin::BI__builtin_memmove: + case Builtin::BImemset: + case Builtin::BI__builtin_memset: + case Builtin::BI__builtin_memset_inline: + case Builtin::BI__builtin___memset_chk: + case Builtin::BI__builtin_wmemchr: + case Builtin::BI__builtin_wmemcmp: + break; // Handled as library calls below. + case Builtin::BI__builtin_dwarf_cfa: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_return_address: + case Builtin::BI_ReturnAddress: + case Builtin::BI__builtin_frame_address: { + mlir::Location loc = getLoc(e->getExprLoc()); + llvm::APSInt level = e->getArg(0)->EvaluateKnownConstInt(getContext()); + if (builtinID == Builtin::BI__builtin_return_address) { + return RValue::get(cir::ReturnAddrOp::create( + builder, loc, + builder.getConstAPInt(loc, builder.getUInt32Ty(), level))); + } + return RValue::get(cir::FrameAddrOp::create( + builder, loc, + builder.getConstAPInt(loc, builder.getUInt32Ty(), level))); + } + case Builtin::BI__builtin_extract_return_addr: + case Builtin::BI__builtin_frob_return_addr: + case Builtin::BI__builtin_dwarf_sp_column: + case Builtin::BI__builtin_init_dwarf_reg_size_table: + case Builtin::BI__builtin_eh_return: + case Builtin::BI__builtin_unwind_init: + case Builtin::BI__builtin_extend_pointer: + case Builtin::BI__builtin_setjmp: + case Builtin::BI__builtin_longjmp: + case Builtin::BI__builtin_launder: + case Builtin::BI__sync_fetch_and_add: + case Builtin::BI__sync_fetch_and_sub: + case Builtin::BI__sync_fetch_and_or: + case Builtin::BI__sync_fetch_and_and: + case Builtin::BI__sync_fetch_and_xor: + case Builtin::BI__sync_fetch_and_nand: + case Builtin::BI__sync_add_and_fetch: + case Builtin::BI__sync_sub_and_fetch: + case Builtin::BI__sync_and_and_fetch: + case Builtin::BI__sync_or_and_fetch: + case Builtin::BI__sync_xor_and_fetch: + case Builtin::BI__sync_nand_and_fetch: + case Builtin::BI__sync_val_compare_and_swap: + case Builtin::BI__sync_bool_compare_and_swap: + case Builtin::BI__sync_lock_test_and_set: + case Builtin::BI__sync_lock_release: + case Builtin::BI__sync_swap: + case Builtin::BI__sync_fetch_and_add_1: + case Builtin::BI__sync_fetch_and_add_2: + case Builtin::BI__sync_fetch_and_add_4: + case Builtin::BI__sync_fetch_and_add_8: + case Builtin::BI__sync_fetch_and_add_16: + case Builtin::BI__sync_fetch_and_sub_1: + case Builtin::BI__sync_fetch_and_sub_2: + case Builtin::BI__sync_fetch_and_sub_4: + case Builtin::BI__sync_fetch_and_sub_8: + case Builtin::BI__sync_fetch_and_sub_16: + case Builtin::BI__sync_fetch_and_or_1: + case Builtin::BI__sync_fetch_and_or_2: + case Builtin::BI__sync_fetch_and_or_4: + case Builtin::BI__sync_fetch_and_or_8: + case Builtin::BI__sync_fetch_and_or_16: + case Builtin::BI__sync_fetch_and_and_1: + case Builtin::BI__sync_fetch_and_and_2: + case Builtin::BI__sync_fetch_and_and_4: + case Builtin::BI__sync_fetch_and_and_8: + case Builtin::BI__sync_fetch_and_and_16: + case Builtin::BI__sync_fetch_and_xor_1: + case Builtin::BI__sync_fetch_and_xor_2: + case Builtin::BI__sync_fetch_and_xor_4: + case Builtin::BI__sync_fetch_and_xor_8: + case Builtin::BI__sync_fetch_and_xor_16: + case Builtin::BI__sync_fetch_and_nand_1: + case Builtin::BI__sync_fetch_and_nand_2: + case Builtin::BI__sync_fetch_and_nand_4: + case Builtin::BI__sync_fetch_and_nand_8: + case Builtin::BI__sync_fetch_and_nand_16: + case Builtin::BI__sync_fetch_and_min: + case Builtin::BI__sync_fetch_and_max: + case Builtin::BI__sync_fetch_and_umin: + case Builtin::BI__sync_fetch_and_umax: + case Builtin::BI__sync_add_and_fetch_1: + case Builtin::BI__sync_add_and_fetch_2: + case Builtin::BI__sync_add_and_fetch_4: + case Builtin::BI__sync_add_and_fetch_8: + case Builtin::BI__sync_add_and_fetch_16: + case Builtin::BI__sync_sub_and_fetch_1: + case Builtin::BI__sync_sub_and_fetch_2: + case Builtin::BI__sync_sub_and_fetch_4: + case Builtin::BI__sync_sub_and_fetch_8: + case Builtin::BI__sync_sub_and_fetch_16: + case Builtin::BI__sync_and_and_fetch_1: + case Builtin::BI__sync_and_and_fetch_2: + case Builtin::BI__sync_and_and_fetch_4: + case Builtin::BI__sync_and_and_fetch_8: + case Builtin::BI__sync_and_and_fetch_16: + case Builtin::BI__sync_or_and_fetch_1: + case Builtin::BI__sync_or_and_fetch_2: + case Builtin::BI__sync_or_and_fetch_4: + case Builtin::BI__sync_or_and_fetch_8: + case Builtin::BI__sync_or_and_fetch_16: + case Builtin::BI__sync_xor_and_fetch_1: + case Builtin::BI__sync_xor_and_fetch_2: + case Builtin::BI__sync_xor_and_fetch_4: + case Builtin::BI__sync_xor_and_fetch_8: + case Builtin::BI__sync_xor_and_fetch_16: + case Builtin::BI__sync_nand_and_fetch_1: + case Builtin::BI__sync_nand_and_fetch_2: + case Builtin::BI__sync_nand_and_fetch_4: + case Builtin::BI__sync_nand_and_fetch_8: + case Builtin::BI__sync_nand_and_fetch_16: + case Builtin::BI__sync_val_compare_and_swap_1: + case Builtin::BI__sync_val_compare_and_swap_2: + case Builtin::BI__sync_val_compare_and_swap_4: + case Builtin::BI__sync_val_compare_and_swap_8: + case Builtin::BI__sync_val_compare_and_swap_16: + case Builtin::BI__sync_bool_compare_and_swap_1: + case Builtin::BI__sync_bool_compare_and_swap_2: + case Builtin::BI__sync_bool_compare_and_swap_4: + case Builtin::BI__sync_bool_compare_and_swap_8: + case Builtin::BI__sync_bool_compare_and_swap_16: + case Builtin::BI__sync_swap_1: + case Builtin::BI__sync_swap_2: + case Builtin::BI__sync_swap_4: + case Builtin::BI__sync_swap_8: + case Builtin::BI__sync_swap_16: + case Builtin::BI__sync_lock_test_and_set_1: + case Builtin::BI__sync_lock_test_and_set_2: + case Builtin::BI__sync_lock_test_and_set_4: + case Builtin::BI__sync_lock_test_and_set_8: + case Builtin::BI__sync_lock_test_and_set_16: + case Builtin::BI__sync_lock_release_1: + case Builtin::BI__sync_lock_release_2: + case Builtin::BI__sync_lock_release_4: + case Builtin::BI__sync_lock_release_8: + case Builtin::BI__sync_lock_release_16: + case Builtin::BI__sync_synchronize: + case Builtin::BI__builtin_nontemporal_load: + case Builtin::BI__builtin_nontemporal_store: + case Builtin::BI__c11_atomic_is_lock_free: + case Builtin::BI__atomic_is_lock_free: + case Builtin::BI__atomic_test_and_set: + case Builtin::BI__atomic_clear: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__atomic_thread_fence: + return RValue::get( + makeAtomicFenceValue(*this, e, cir::SyncScopeKind::System)); + case Builtin::BI__atomic_signal_fence: + return RValue::get( + makeAtomicFenceValue(*this, e, cir::SyncScopeKind::SingleThread)); + case Builtin::BI__c11_atomic_thread_fence: + case Builtin::BI__c11_atomic_signal_fence: + case Builtin::BI__scoped_atomic_thread_fence: + case Builtin::BI__builtin_signbit: + case Builtin::BI__builtin_signbitf: + case Builtin::BI__builtin_signbitl: + case Builtin::BI__warn_memset_zero_len: + case Builtin::BI__annotation: + case Builtin::BI__builtin_annotation: + case Builtin::BI__builtin_addcb: + case Builtin::BI__builtin_addcs: + case Builtin::BI__builtin_addc: + case Builtin::BI__builtin_addcl: + case Builtin::BI__builtin_addcll: + case Builtin::BI__builtin_subcb: + case Builtin::BI__builtin_subcs: + case Builtin::BI__builtin_subc: + case Builtin::BI__builtin_subcl: + case Builtin::BI__builtin_subcll: + return errorBuiltinNYI(*this, e, builtinID); + + case Builtin::BI__builtin_add_overflow: + case Builtin::BI__builtin_sub_overflow: + case Builtin::BI__builtin_mul_overflow: { + const clang::Expr *leftArg = e->getArg(0); + const clang::Expr *rightArg = e->getArg(1); + const clang::Expr *resultArg = e->getArg(2); + + clang::QualType resultQTy = + resultArg->getType()->castAs<clang::PointerType>()->getPointeeType(); + + WidthAndSignedness leftInfo = + getIntegerWidthAndSignedness(cgm.getASTContext(), leftArg->getType()); + WidthAndSignedness rightInfo = + getIntegerWidthAndSignedness(cgm.getASTContext(), rightArg->getType()); + WidthAndSignedness resultInfo = + getIntegerWidthAndSignedness(cgm.getASTContext(), resultQTy); + + // Note we compute the encompassing type with the consideration to the + // result type, so later in LLVM lowering we don't get redundant integral + // extension casts. + WidthAndSignedness encompassingInfo = + EncompassingIntegerType({leftInfo, rightInfo, resultInfo}); + + auto encompassingCIRTy = cir::IntType::get( + &getMLIRContext(), encompassingInfo.width, encompassingInfo.isSigned); + auto resultCIRTy = mlir::cast<cir::IntType>(cgm.convertType(resultQTy)); + + mlir::Value left = emitScalarExpr(leftArg); + mlir::Value right = emitScalarExpr(rightArg); + Address resultPtr = emitPointerWithAlignment(resultArg); + + // Extend each operand to the encompassing type, if necessary. + if (left.getType() != encompassingCIRTy) + left = + builder.createCast(cir::CastKind::integral, left, encompassingCIRTy); + if (right.getType() != encompassingCIRTy) + right = + builder.createCast(cir::CastKind::integral, right, encompassingCIRTy); + + // Perform the operation on the extended values. + cir::BinOpOverflowKind opKind; + switch (builtinID) { + default: + llvm_unreachable("Unknown overflow builtin id."); + case Builtin::BI__builtin_add_overflow: + opKind = cir::BinOpOverflowKind::Add; + break; + case Builtin::BI__builtin_sub_overflow: + opKind = cir::BinOpOverflowKind::Sub; + break; + case Builtin::BI__builtin_mul_overflow: + opKind = cir::BinOpOverflowKind::Mul; + break; + } + + mlir::Location loc = getLoc(e->getSourceRange()); + auto arithOp = cir::BinOpOverflowOp::create(builder, loc, resultCIRTy, + opKind, left, right); + + // Here is a slight difference from the original clang CodeGen: + // - In the original clang CodeGen, the checked arithmetic result is + // first computed as a value of the encompassing type, and then it is + // truncated to the actual result type with a second overflow checking. + // - In CIRGen, the checked arithmetic operation directly produce the + // checked arithmetic result in its expected type. + // + // So we don't need a truncation and a second overflow checking here. + + // Finally, store the result using the pointer. + bool isVolatile = + resultArg->getType()->getPointeeType().isVolatileQualified(); + builder.createStore(loc, emitToMemory(arithOp.getResult(), resultQTy), + resultPtr, isVolatile); + + return RValue::get(arithOp.getOverflow()); + } + + case Builtin::BI__builtin_uadd_overflow: + case Builtin::BI__builtin_uaddl_overflow: + case Builtin::BI__builtin_uaddll_overflow: + case Builtin::BI__builtin_usub_overflow: + case Builtin::BI__builtin_usubl_overflow: + case Builtin::BI__builtin_usubll_overflow: + case Builtin::BI__builtin_umul_overflow: + case Builtin::BI__builtin_umull_overflow: + case Builtin::BI__builtin_umulll_overflow: + case Builtin::BI__builtin_sadd_overflow: + case Builtin::BI__builtin_saddl_overflow: + case Builtin::BI__builtin_saddll_overflow: + case Builtin::BI__builtin_ssub_overflow: + case Builtin::BI__builtin_ssubl_overflow: + case Builtin::BI__builtin_ssubll_overflow: + case Builtin::BI__builtin_smul_overflow: + case Builtin::BI__builtin_smull_overflow: + case Builtin::BI__builtin_smulll_overflow: { + // Scalarize our inputs. + mlir::Value x = emitScalarExpr(e->getArg(0)); + mlir::Value y = emitScalarExpr(e->getArg(1)); + + const clang::Expr *resultArg = e->getArg(2); + Address resultPtr = emitPointerWithAlignment(resultArg); + + // Decide which of the arithmetic operation we are lowering to: + cir::BinOpOverflowKind arithKind; + switch (builtinID) { + default: + llvm_unreachable("Unknown overflow builtin id."); + case Builtin::BI__builtin_uadd_overflow: + case Builtin::BI__builtin_uaddl_overflow: + case Builtin::BI__builtin_uaddll_overflow: + case Builtin::BI__builtin_sadd_overflow: + case Builtin::BI__builtin_saddl_overflow: + case Builtin::BI__builtin_saddll_overflow: + arithKind = cir::BinOpOverflowKind::Add; + break; + case Builtin::BI__builtin_usub_overflow: + case Builtin::BI__builtin_usubl_overflow: + case Builtin::BI__builtin_usubll_overflow: + case Builtin::BI__builtin_ssub_overflow: + case Builtin::BI__builtin_ssubl_overflow: + case Builtin::BI__builtin_ssubll_overflow: + arithKind = cir::BinOpOverflowKind::Sub; + break; + case Builtin::BI__builtin_umul_overflow: + case Builtin::BI__builtin_umull_overflow: + case Builtin::BI__builtin_umulll_overflow: + case Builtin::BI__builtin_smul_overflow: + case Builtin::BI__builtin_smull_overflow: + case Builtin::BI__builtin_smulll_overflow: + arithKind = cir::BinOpOverflowKind::Mul; + break; + } + + clang::QualType resultQTy = + resultArg->getType()->castAs<clang::PointerType>()->getPointeeType(); + auto resultCIRTy = mlir::cast<cir::IntType>(cgm.convertType(resultQTy)); + + mlir::Location loc = getLoc(e->getSourceRange()); + cir::BinOpOverflowOp arithOp = cir::BinOpOverflowOp::create( + builder, loc, resultCIRTy, arithKind, x, y); + + bool isVolatile = + resultArg->getType()->getPointeeType().isVolatileQualified(); + builder.createStore(loc, emitToMemory(arithOp.getResult(), resultQTy), + resultPtr, isVolatile); + + return RValue::get(arithOp.getOverflow()); + } + + case Builtin::BIaddressof: + case Builtin::BI__addressof: + case Builtin::BI__builtin_addressof: + case Builtin::BI__builtin_function_start: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_operator_new: + return emitNewOrDeleteBuiltinCall( + e->getCallee()->getType()->castAs<FunctionProtoType>(), e, OO_New); + case Builtin::BI__builtin_operator_delete: + emitNewOrDeleteBuiltinCall( + e->getCallee()->getType()->castAs<FunctionProtoType>(), e, OO_Delete); + return RValue::get(nullptr); + case Builtin::BI__builtin_is_aligned: + case Builtin::BI__builtin_align_up: + case Builtin::BI__builtin_align_down: + case Builtin::BI__noop: + case Builtin::BI__builtin_call_with_static_chain: + case Builtin::BI_InterlockedExchange8: + case Builtin::BI_InterlockedExchange16: + case Builtin::BI_InterlockedExchange: + case Builtin::BI_InterlockedExchangePointer: + case Builtin::BI_InterlockedCompareExchangePointer: + case Builtin::BI_InterlockedCompareExchangePointer_nf: + case Builtin::BI_InterlockedCompareExchange8: + case Builtin::BI_InterlockedCompareExchange16: + case Builtin::BI_InterlockedCompareExchange: + case Builtin::BI_InterlockedCompareExchange64: + case Builtin::BI_InterlockedIncrement16: + case Builtin::BI_InterlockedIncrement: + case Builtin::BI_InterlockedDecrement16: + case Builtin::BI_InterlockedDecrement: + case Builtin::BI_InterlockedAnd8: + case Builtin::BI_InterlockedAnd16: + case Builtin::BI_InterlockedAnd: + case Builtin::BI_InterlockedExchangeAdd8: + case Builtin::BI_InterlockedExchangeAdd16: + case Builtin::BI_InterlockedExchangeAdd: + case Builtin::BI_InterlockedExchangeSub8: + case Builtin::BI_InterlockedExchangeSub16: + case Builtin::BI_InterlockedExchangeSub: + case Builtin::BI_InterlockedOr8: + case Builtin::BI_InterlockedOr16: + case Builtin::BI_InterlockedOr: + case Builtin::BI_InterlockedXor8: + case Builtin::BI_InterlockedXor16: + case Builtin::BI_InterlockedXor: + case Builtin::BI_bittest64: + case Builtin::BI_bittest: + case Builtin::BI_bittestandcomplement64: + case Builtin::BI_bittestandcomplement: + case Builtin::BI_bittestandreset64: + case Builtin::BI_bittestandreset: + case Builtin::BI_bittestandset64: + case Builtin::BI_bittestandset: + case Builtin::BI_interlockedbittestandreset: + case Builtin::BI_interlockedbittestandreset64: + case Builtin::BI_interlockedbittestandreset64_acq: + case Builtin::BI_interlockedbittestandreset64_rel: + case Builtin::BI_interlockedbittestandreset64_nf: + case Builtin::BI_interlockedbittestandset64: + case Builtin::BI_interlockedbittestandset64_acq: + case Builtin::BI_interlockedbittestandset64_rel: + case Builtin::BI_interlockedbittestandset64_nf: + case Builtin::BI_interlockedbittestandset: + case Builtin::BI_interlockedbittestandset_acq: + case Builtin::BI_interlockedbittestandset_rel: + case Builtin::BI_interlockedbittestandset_nf: + case Builtin::BI_interlockedbittestandreset_acq: + case Builtin::BI_interlockedbittestandreset_rel: + case Builtin::BI_interlockedbittestandreset_nf: + case Builtin::BI__iso_volatile_load8: + case Builtin::BI__iso_volatile_load16: + case Builtin::BI__iso_volatile_load32: + case Builtin::BI__iso_volatile_load64: + case Builtin::BI__iso_volatile_store8: + case Builtin::BI__iso_volatile_store16: + case Builtin::BI__iso_volatile_store32: + case Builtin::BI__iso_volatile_store64: + case Builtin::BI__builtin_ptrauth_sign_constant: + case Builtin::BI__builtin_ptrauth_auth: + case Builtin::BI__builtin_ptrauth_auth_and_resign: + case Builtin::BI__builtin_ptrauth_blend_discriminator: + case Builtin::BI__builtin_ptrauth_sign_generic_data: + case Builtin::BI__builtin_ptrauth_sign_unauthenticated: + case Builtin::BI__builtin_ptrauth_strip: + case Builtin::BI__builtin_get_vtable_pointer: + case Builtin::BI__exception_code: + case Builtin::BI_exception_code: + case Builtin::BI__exception_info: + case Builtin::BI_exception_info: + case Builtin::BI__abnormal_termination: + case Builtin::BI_abnormal_termination: + case Builtin::BI_setjmpex: + case Builtin::BI_setjmp: + case Builtin::BImove: + case Builtin::BImove_if_noexcept: + case Builtin::BIforward: + case Builtin::BIforward_like: + case Builtin::BIas_const: + case Builtin::BI__GetExceptionInfo: + case Builtin::BI__fastfail: + case Builtin::BIread_pipe: + case Builtin::BIwrite_pipe: + case Builtin::BIreserve_read_pipe: + case Builtin::BIreserve_write_pipe: + case Builtin::BIwork_group_reserve_read_pipe: + case Builtin::BIwork_group_reserve_write_pipe: + case Builtin::BIsub_group_reserve_read_pipe: + case Builtin::BIsub_group_reserve_write_pipe: + case Builtin::BIcommit_read_pipe: + case Builtin::BIcommit_write_pipe: + case Builtin::BIwork_group_commit_read_pipe: + case Builtin::BIwork_group_commit_write_pipe: + case Builtin::BIsub_group_commit_read_pipe: + case Builtin::BIsub_group_commit_write_pipe: + case Builtin::BIget_pipe_num_packets: + case Builtin::BIget_pipe_max_packets: + case Builtin::BIto_global: + case Builtin::BIto_local: + case Builtin::BIto_private: + case Builtin::BIenqueue_kernel: + case Builtin::BIget_kernel_work_group_size: + case Builtin::BIget_kernel_preferred_work_group_size_multiple: + case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: + case Builtin::BIget_kernel_sub_group_count_for_ndrange: + case Builtin::BI__builtin_store_half: + case Builtin::BI__builtin_store_halff: + case Builtin::BI__builtin_load_half: + case Builtin::BI__builtin_load_halff: + return errorBuiltinNYI(*this, e, builtinID); + case Builtin::BI__builtin_printf: + case Builtin::BIprintf: + break; + case Builtin::BI__builtin_canonicalize: + case Builtin::BI__builtin_canonicalizef: + case Builtin::BI__builtin_canonicalizef16: + case Builtin::BI__builtin_canonicalizel: + case Builtin::BI__builtin_thread_pointer: + case Builtin::BI__builtin_os_log_format: + case Builtin::BI__xray_customevent: + case Builtin::BI__xray_typedevent: + case Builtin::BI__builtin_ms_va_start: + case Builtin::BI__builtin_ms_va_end: + case Builtin::BI__builtin_ms_va_copy: + case Builtin::BI__builtin_get_device_side_mangled_name: + return errorBuiltinNYI(*this, e, builtinID); } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -541,9 +1393,13 @@ static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf, case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: + // These are actually NYI, but that will be reported by emitBuiltinExpr. + // At this point, we don't even know that the builtin is target-specific. + return nullptr; case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: + return cgf->emitAArch64BuiltinExpr(builtinID, e, returnValue, arch); case llvm::Triple::bpfeb: case llvm::Triple::bpfel: // These are actually NYI, but that will be reported by emitBuiltinExpr. @@ -590,6 +1446,22 @@ CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e, getTarget().getTriple().getArch()); } +mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg( + const unsigned iceArguments, const unsigned idx, const Expr *argExpr) { + mlir::Value arg = {}; + if ((iceArguments & (1 << idx)) == 0) { + arg = emitScalarExpr(argExpr); + } else { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + const std::optional<llvm::APSInt> result = + argExpr->getIntegerConstantExpr(getContext()); + assert(result && "Expected argument to be a constant"); + arg = builder.getConstInt(getLoc(argExpr->getSourceRange()), *result); + } + return arg; +} + /// Given a builtin id for a function like "__builtin_fabsf", return a Function* /// for "fabsf". cir::FuncOp CIRGenModule::getBuiltinLibFunction(const FunctionDecl *fd, @@ -641,3 +1513,42 @@ mlir::Value CIRGenFunction::emitVAArg(VAArgExpr *ve) { mlir::Value vaList = emitVAListRef(ve->getSubExpr()).getPointer(); return cir::VAArgOp::create(builder, loc, type, vaList); } + +mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type, + cir::IntType resType, + mlir::Value emittedE, + bool isDynamic) { + assert(!cir::MissingFeatures::opCallImplicitObjectSizeArgs()); + + // LLVM can't handle type=3 appropriately, and __builtin_object_size shouldn't + // evaluate e for side-effects. In either case, just like original LLVM + // lowering, we shouldn't lower to `cir.objsize` but to a constant instead. + if (type == 3 || (!emittedE && e->HasSideEffects(getContext()))) + return builder.getConstInt(getLoc(e->getSourceRange()), resType, + (type & 2) ? 0 : -1); + + mlir::Value ptr = emittedE ? emittedE : emitScalarExpr(e); + assert(mlir::isa<cir::PointerType>(ptr.getType()) && + "Non-pointer passed to __builtin_object_size?"); + + assert(!cir::MissingFeatures::countedBySize()); + + // Extract the min/max mode from type. CIR only supports type 0 + // (max, whole object) and type 2 (min, whole object), not type 1 or 3 + // (closest subobject variants). + const bool min = ((type & 2) != 0); + // For GCC compatibility, __builtin_object_size treats NULL as unknown size. + auto op = + cir::ObjSizeOp::create(builder, getLoc(e->getSourceRange()), resType, ptr, + min, /*nullUnknown=*/true, isDynamic); + return op.getResult(); +} + +mlir::Value CIRGenFunction::evaluateOrEmitBuiltinObjectSize( + const Expr *e, unsigned type, cir::IntType resType, mlir::Value emittedE, + bool isDynamic) { + uint64_t objectSize; + if (!e->tryEvaluateObjectSize(objectSize, getContext(), type)) + return emitBuiltinObjectSize(e, type, resType, emittedE, isDynamic); + return builder.getConstInt(getLoc(e->getSourceRange()), resType, objectSize); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp new file mode 100644 index 0000000..5a9ae59 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -0,0 +1,1583 @@ +//===---- CIRGenBuiltinAArch64.cpp - Emit CIR for AArch64 builtins --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit ARM64 Builtin calls as CIR or a function call +// to be later resolved. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" +#include "clang/CIR/MissingFeatures.h" + +// TODO(cir): once all builtins are covered, decide whether we still +// need to use LLVM intrinsics or if there's a better approach to follow. Right +// now the intrinsics are reused to make it convenient to encode all thousands +// of them and passing down to LLVM lowering. +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" + +#include "mlir/IR/Value.h" +#include "clang/AST/GlobalDecl.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/TargetBuiltins.h" + +using namespace clang; +using namespace clang::CIRGen; +using namespace llvm; + +mlir::Value CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID, + const CallExpr *expr) { + if (builtinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && + builtinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + assert(!cir::MissingFeatures::aarch64SVEIntrinsics()); + + switch (builtinID) { + default: + return {}; + + case SVE::BI__builtin_sve_svreinterpret_b: + case SVE::BI__builtin_sve_svreinterpret_c: + case SVE::BI__builtin_sve_svpsel_lane_b8: + case SVE::BI__builtin_sve_svpsel_lane_b16: + case SVE::BI__builtin_sve_svpsel_lane_b32: + case SVE::BI__builtin_sve_svpsel_lane_b64: + case SVE::BI__builtin_sve_svpsel_lane_c8: + case SVE::BI__builtin_sve_svpsel_lane_c16: + case SVE::BI__builtin_sve_svpsel_lane_c32: + case SVE::BI__builtin_sve_svpsel_lane_c64: + case SVE::BI__builtin_sve_svmov_b_z: + case SVE::BI__builtin_sve_svnot_b_z: + case SVE::BI__builtin_sve_svmovlb_u16: + case SVE::BI__builtin_sve_svmovlb_u32: + case SVE::BI__builtin_sve_svmovlb_u64: + case SVE::BI__builtin_sve_svmovlb_s16: + case SVE::BI__builtin_sve_svmovlb_s32: + case SVE::BI__builtin_sve_svmovlb_s64: + case SVE::BI__builtin_sve_svmovlt_u16: + case SVE::BI__builtin_sve_svmovlt_u32: + case SVE::BI__builtin_sve_svmovlt_u64: + case SVE::BI__builtin_sve_svmovlt_s16: + case SVE::BI__builtin_sve_svmovlt_s32: + case SVE::BI__builtin_sve_svmovlt_s64: + case SVE::BI__builtin_sve_svpmullt_u16: + case SVE::BI__builtin_sve_svpmullt_u64: + case SVE::BI__builtin_sve_svpmullt_n_u16: + case SVE::BI__builtin_sve_svpmullt_n_u64: + case SVE::BI__builtin_sve_svpmullb_u16: + case SVE::BI__builtin_sve_svpmullb_u64: + case SVE::BI__builtin_sve_svpmullb_n_u16: + case SVE::BI__builtin_sve_svpmullb_n_u64: + case SVE::BI__builtin_sve_svdup_n_b8: + case SVE::BI__builtin_sve_svdup_n_b16: + case SVE::BI__builtin_sve_svdup_n_b32: + case SVE::BI__builtin_sve_svdup_n_b64: + case SVE::BI__builtin_sve_svdupq_n_b8: + case SVE::BI__builtin_sve_svdupq_n_b16: + case SVE::BI__builtin_sve_svdupq_n_b32: + case SVE::BI__builtin_sve_svdupq_n_b64: + case SVE::BI__builtin_sve_svdupq_n_u8: + case SVE::BI__builtin_sve_svdupq_n_s8: + case SVE::BI__builtin_sve_svdupq_n_u64: + case SVE::BI__builtin_sve_svdupq_n_f64: + case SVE::BI__builtin_sve_svdupq_n_s64: + case SVE::BI__builtin_sve_svdupq_n_u16: + case SVE::BI__builtin_sve_svdupq_n_f16: + case SVE::BI__builtin_sve_svdupq_n_bf16: + case SVE::BI__builtin_sve_svdupq_n_s16: + case SVE::BI__builtin_sve_svdupq_n_u32: + case SVE::BI__builtin_sve_svdupq_n_f32: + case SVE::BI__builtin_sve_svdupq_n_s32: + case SVE::BI__builtin_sve_svpfalse_b: + case SVE::BI__builtin_sve_svpfalse_c: + case SVE::BI__builtin_sve_svlen_bf16: + case SVE::BI__builtin_sve_svlen_f16: + case SVE::BI__builtin_sve_svlen_f32: + case SVE::BI__builtin_sve_svlen_f64: + case SVE::BI__builtin_sve_svlen_s8: + case SVE::BI__builtin_sve_svlen_s16: + case SVE::BI__builtin_sve_svlen_s32: + case SVE::BI__builtin_sve_svlen_s64: + case SVE::BI__builtin_sve_svlen_u8: + case SVE::BI__builtin_sve_svlen_u16: + case SVE::BI__builtin_sve_svlen_u32: + case SVE::BI__builtin_sve_svlen_u64: + case SVE::BI__builtin_sve_svtbl2_u8: + case SVE::BI__builtin_sve_svtbl2_s8: + case SVE::BI__builtin_sve_svtbl2_u16: + case SVE::BI__builtin_sve_svtbl2_s16: + case SVE::BI__builtin_sve_svtbl2_u32: + case SVE::BI__builtin_sve_svtbl2_s32: + case SVE::BI__builtin_sve_svtbl2_u64: + case SVE::BI__builtin_sve_svtbl2_s64: + case SVE::BI__builtin_sve_svtbl2_f16: + case SVE::BI__builtin_sve_svtbl2_bf16: + case SVE::BI__builtin_sve_svtbl2_f32: + case SVE::BI__builtin_sve_svtbl2_f64: + case SVE::BI__builtin_sve_svset_neonq_s8: + case SVE::BI__builtin_sve_svset_neonq_s16: + case SVE::BI__builtin_sve_svset_neonq_s32: + case SVE::BI__builtin_sve_svset_neonq_s64: + case SVE::BI__builtin_sve_svset_neonq_u8: + case SVE::BI__builtin_sve_svset_neonq_u16: + case SVE::BI__builtin_sve_svset_neonq_u32: + case SVE::BI__builtin_sve_svset_neonq_u64: + case SVE::BI__builtin_sve_svset_neonq_f16: + case SVE::BI__builtin_sve_svset_neonq_f32: + case SVE::BI__builtin_sve_svset_neonq_f64: + case SVE::BI__builtin_sve_svset_neonq_bf16: + case SVE::BI__builtin_sve_svget_neonq_s8: + case SVE::BI__builtin_sve_svget_neonq_s16: + case SVE::BI__builtin_sve_svget_neonq_s32: + case SVE::BI__builtin_sve_svget_neonq_s64: + case SVE::BI__builtin_sve_svget_neonq_u8: + case SVE::BI__builtin_sve_svget_neonq_u16: + case SVE::BI__builtin_sve_svget_neonq_u32: + case SVE::BI__builtin_sve_svget_neonq_u64: + case SVE::BI__builtin_sve_svget_neonq_f16: + case SVE::BI__builtin_sve_svget_neonq_f32: + case SVE::BI__builtin_sve_svget_neonq_f64: + case SVE::BI__builtin_sve_svget_neonq_bf16: + case SVE::BI__builtin_sve_svdup_neonq_s8: + case SVE::BI__builtin_sve_svdup_neonq_s16: + case SVE::BI__builtin_sve_svdup_neonq_s32: + case SVE::BI__builtin_sve_svdup_neonq_s64: + case SVE::BI__builtin_sve_svdup_neonq_u8: + case SVE::BI__builtin_sve_svdup_neonq_u16: + case SVE::BI__builtin_sve_svdup_neonq_u32: + case SVE::BI__builtin_sve_svdup_neonq_u64: + case SVE::BI__builtin_sve_svdup_neonq_f16: + case SVE::BI__builtin_sve_svdup_neonq_f32: + case SVE::BI__builtin_sve_svdup_neonq_f64: + case SVE::BI__builtin_sve_svdup_neonq_bf16: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + // Unreachable: All cases in the switch above return. +} + +mlir::Value CIRGenFunction::emitAArch64SMEBuiltinExpr(unsigned builtinID, + const CallExpr *expr) { + assert(!cir::MissingFeatures::aarch64SMEIntrinsics()); + + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; +} + +// Some intrinsics are equivalent for codegen. +static const std::pair<unsigned, unsigned> neonEquivalentIntrinsicMap[] = { + { + NEON::BI__builtin_neon_splat_lane_bf16, + NEON::BI__builtin_neon_splat_lane_v, + }, + { + NEON::BI__builtin_neon_splat_laneq_bf16, + NEON::BI__builtin_neon_splat_laneq_v, + }, + { + NEON::BI__builtin_neon_splatq_lane_bf16, + NEON::BI__builtin_neon_splatq_lane_v, + }, + { + NEON::BI__builtin_neon_splatq_laneq_bf16, + NEON::BI__builtin_neon_splatq_laneq_v, + }, + { + NEON::BI__builtin_neon_vabd_f16, + NEON::BI__builtin_neon_vabd_v, + }, + { + NEON::BI__builtin_neon_vabdq_f16, + NEON::BI__builtin_neon_vabdq_v, + }, + { + NEON::BI__builtin_neon_vabs_f16, + NEON::BI__builtin_neon_vabs_v, + }, + { + NEON::BI__builtin_neon_vabsq_f16, + NEON::BI__builtin_neon_vabsq_v, + }, + { + NEON::BI__builtin_neon_vcage_f16, + NEON::BI__builtin_neon_vcage_v, + }, + { + NEON::BI__builtin_neon_vcageq_f16, + NEON::BI__builtin_neon_vcageq_v, + }, + { + NEON::BI__builtin_neon_vcagt_f16, + NEON::BI__builtin_neon_vcagt_v, + }, + { + NEON::BI__builtin_neon_vcagtq_f16, + NEON::BI__builtin_neon_vcagtq_v, + }, + { + NEON::BI__builtin_neon_vcale_f16, + NEON::BI__builtin_neon_vcale_v, + }, + { + NEON::BI__builtin_neon_vcaleq_f16, + NEON::BI__builtin_neon_vcaleq_v, + }, + { + NEON::BI__builtin_neon_vcalt_f16, + NEON::BI__builtin_neon_vcalt_v, + }, + { + NEON::BI__builtin_neon_vcaltq_f16, + NEON::BI__builtin_neon_vcaltq_v, + }, + { + NEON::BI__builtin_neon_vceqz_f16, + NEON::BI__builtin_neon_vceqz_v, + }, + { + NEON::BI__builtin_neon_vceqzq_f16, + NEON::BI__builtin_neon_vceqzq_v, + }, + { + NEON::BI__builtin_neon_vcgez_f16, + NEON::BI__builtin_neon_vcgez_v, + }, + { + NEON::BI__builtin_neon_vcgezq_f16, + NEON::BI__builtin_neon_vcgezq_v, + }, + { + NEON::BI__builtin_neon_vcgtz_f16, + NEON::BI__builtin_neon_vcgtz_v, + }, + { + NEON::BI__builtin_neon_vcgtzq_f16, + NEON::BI__builtin_neon_vcgtzq_v, + }, + { + NEON::BI__builtin_neon_vclez_f16, + NEON::BI__builtin_neon_vclez_v, + }, + { + NEON::BI__builtin_neon_vclezq_f16, + NEON::BI__builtin_neon_vclezq_v, + }, + { + NEON::BI__builtin_neon_vcltz_f16, + NEON::BI__builtin_neon_vcltz_v, + }, + { + NEON::BI__builtin_neon_vcltzq_f16, + NEON::BI__builtin_neon_vcltzq_v, + }, + { + NEON::BI__builtin_neon_vfma_f16, + NEON::BI__builtin_neon_vfma_v, + }, + { + NEON::BI__builtin_neon_vfma_lane_f16, + NEON::BI__builtin_neon_vfma_lane_v, + }, + { + NEON::BI__builtin_neon_vfma_laneq_f16, + NEON::BI__builtin_neon_vfma_laneq_v, + }, + { + NEON::BI__builtin_neon_vfmaq_f16, + NEON::BI__builtin_neon_vfmaq_v, + }, + { + NEON::BI__builtin_neon_vfmaq_lane_f16, + NEON::BI__builtin_neon_vfmaq_lane_v, + }, + { + NEON::BI__builtin_neon_vfmaq_laneq_f16, + NEON::BI__builtin_neon_vfmaq_laneq_v, + }, + {NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v}, + {NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v}, + {NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v}, + {NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v}, + {NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v}, + {NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v}, + {NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v}, + {NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v}, + {NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v}, + {NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v}, + {NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v}, + {NEON::BI__builtin_neon_vld1q_lane_bf16, + NEON::BI__builtin_neon_vld1q_lane_v}, + {NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v}, + {NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v}, + {NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v}, + {NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v}, + {NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v}, + {NEON::BI__builtin_neon_vld2q_lane_bf16, + NEON::BI__builtin_neon_vld2q_lane_v}, + {NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v}, + {NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v}, + {NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v}, + {NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v}, + {NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v}, + {NEON::BI__builtin_neon_vld3q_lane_bf16, + NEON::BI__builtin_neon_vld3q_lane_v}, + {NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v}, + {NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v}, + {NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v}, + {NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v}, + {NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v}, + {NEON::BI__builtin_neon_vld4q_lane_bf16, + NEON::BI__builtin_neon_vld4q_lane_v}, + { + NEON::BI__builtin_neon_vmax_f16, + NEON::BI__builtin_neon_vmax_v, + }, + { + NEON::BI__builtin_neon_vmaxnm_f16, + NEON::BI__builtin_neon_vmaxnm_v, + }, + { + NEON::BI__builtin_neon_vmaxnmq_f16, + NEON::BI__builtin_neon_vmaxnmq_v, + }, + { + NEON::BI__builtin_neon_vmaxq_f16, + NEON::BI__builtin_neon_vmaxq_v, + }, + { + NEON::BI__builtin_neon_vmin_f16, + NEON::BI__builtin_neon_vmin_v, + }, + { + NEON::BI__builtin_neon_vminnm_f16, + NEON::BI__builtin_neon_vminnm_v, + }, + { + NEON::BI__builtin_neon_vminnmq_f16, + NEON::BI__builtin_neon_vminnmq_v, + }, + { + NEON::BI__builtin_neon_vminq_f16, + NEON::BI__builtin_neon_vminq_v, + }, + { + NEON::BI__builtin_neon_vmulx_f16, + NEON::BI__builtin_neon_vmulx_v, + }, + { + NEON::BI__builtin_neon_vmulxq_f16, + NEON::BI__builtin_neon_vmulxq_v, + }, + { + NEON::BI__builtin_neon_vpadd_f16, + NEON::BI__builtin_neon_vpadd_v, + }, + { + NEON::BI__builtin_neon_vpaddq_f16, + NEON::BI__builtin_neon_vpaddq_v, + }, + { + NEON::BI__builtin_neon_vpmax_f16, + NEON::BI__builtin_neon_vpmax_v, + }, + { + NEON::BI__builtin_neon_vpmaxnm_f16, + NEON::BI__builtin_neon_vpmaxnm_v, + }, + { + NEON::BI__builtin_neon_vpmaxnmq_f16, + NEON::BI__builtin_neon_vpmaxnmq_v, + }, + { + NEON::BI__builtin_neon_vpmaxq_f16, + NEON::BI__builtin_neon_vpmaxq_v, + }, + { + NEON::BI__builtin_neon_vpmin_f16, + NEON::BI__builtin_neon_vpmin_v, + }, + { + NEON::BI__builtin_neon_vpminnm_f16, + NEON::BI__builtin_neon_vpminnm_v, + }, + { + NEON::BI__builtin_neon_vpminnmq_f16, + NEON::BI__builtin_neon_vpminnmq_v, + }, + { + NEON::BI__builtin_neon_vpminq_f16, + NEON::BI__builtin_neon_vpminq_v, + }, + { + NEON::BI__builtin_neon_vrecpe_f16, + NEON::BI__builtin_neon_vrecpe_v, + }, + { + NEON::BI__builtin_neon_vrecpeq_f16, + NEON::BI__builtin_neon_vrecpeq_v, + }, + { + NEON::BI__builtin_neon_vrecps_f16, + NEON::BI__builtin_neon_vrecps_v, + }, + { + NEON::BI__builtin_neon_vrecpsq_f16, + NEON::BI__builtin_neon_vrecpsq_v, + }, + { + NEON::BI__builtin_neon_vrnd_f16, + NEON::BI__builtin_neon_vrnd_v, + }, + { + NEON::BI__builtin_neon_vrnda_f16, + NEON::BI__builtin_neon_vrnda_v, + }, + { + NEON::BI__builtin_neon_vrndaq_f16, + NEON::BI__builtin_neon_vrndaq_v, + }, + { + NEON::BI__builtin_neon_vrndi_f16, + NEON::BI__builtin_neon_vrndi_v, + }, + { + NEON::BI__builtin_neon_vrndiq_f16, + NEON::BI__builtin_neon_vrndiq_v, + }, + { + NEON::BI__builtin_neon_vrndm_f16, + NEON::BI__builtin_neon_vrndm_v, + }, + { + NEON::BI__builtin_neon_vrndmq_f16, + NEON::BI__builtin_neon_vrndmq_v, + }, + { + NEON::BI__builtin_neon_vrndn_f16, + NEON::BI__builtin_neon_vrndn_v, + }, + { + NEON::BI__builtin_neon_vrndnq_f16, + NEON::BI__builtin_neon_vrndnq_v, + }, + { + NEON::BI__builtin_neon_vrndp_f16, + NEON::BI__builtin_neon_vrndp_v, + }, + { + NEON::BI__builtin_neon_vrndpq_f16, + NEON::BI__builtin_neon_vrndpq_v, + }, + { + NEON::BI__builtin_neon_vrndq_f16, + NEON::BI__builtin_neon_vrndq_v, + }, + { + NEON::BI__builtin_neon_vrndx_f16, + NEON::BI__builtin_neon_vrndx_v, + }, + { + NEON::BI__builtin_neon_vrndxq_f16, + NEON::BI__builtin_neon_vrndxq_v, + }, + { + NEON::BI__builtin_neon_vrsqrte_f16, + NEON::BI__builtin_neon_vrsqrte_v, + }, + { + NEON::BI__builtin_neon_vrsqrteq_f16, + NEON::BI__builtin_neon_vrsqrteq_v, + }, + { + NEON::BI__builtin_neon_vrsqrts_f16, + NEON::BI__builtin_neon_vrsqrts_v, + }, + { + NEON::BI__builtin_neon_vrsqrtsq_f16, + NEON::BI__builtin_neon_vrsqrtsq_v, + }, + { + NEON::BI__builtin_neon_vsqrt_f16, + NEON::BI__builtin_neon_vsqrt_v, + }, + { + NEON::BI__builtin_neon_vsqrtq_f16, + NEON::BI__builtin_neon_vsqrtq_v, + }, + {NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v}, + {NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v}, + {NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v}, + {NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v}, + {NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v}, + {NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v}, + {NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v}, + {NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v}, + {NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v}, + {NEON::BI__builtin_neon_vst1q_lane_bf16, + NEON::BI__builtin_neon_vst1q_lane_v}, + {NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v}, + {NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v}, + {NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v}, + {NEON::BI__builtin_neon_vst2q_lane_bf16, + NEON::BI__builtin_neon_vst2q_lane_v}, + {NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v}, + {NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v}, + {NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v}, + {NEON::BI__builtin_neon_vst3q_lane_bf16, + NEON::BI__builtin_neon_vst3q_lane_v}, + {NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v}, + {NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v}, + {NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v}, + {NEON::BI__builtin_neon_vst4q_lane_bf16, + NEON::BI__builtin_neon_vst4q_lane_v}, + // The mangling rules cause us to have one ID for each type for + // vldap1(q)_lane and vstl1(q)_lane, but codegen is equivalent for all of + // them. Choose an arbitrary one to be handled as tha canonical variation. + {NEON::BI__builtin_neon_vldap1_lane_u64, + NEON::BI__builtin_neon_vldap1_lane_s64}, + {NEON::BI__builtin_neon_vldap1_lane_f64, + NEON::BI__builtin_neon_vldap1_lane_s64}, + {NEON::BI__builtin_neon_vldap1_lane_p64, + NEON::BI__builtin_neon_vldap1_lane_s64}, + {NEON::BI__builtin_neon_vldap1q_lane_u64, + NEON::BI__builtin_neon_vldap1q_lane_s64}, + {NEON::BI__builtin_neon_vldap1q_lane_f64, + NEON::BI__builtin_neon_vldap1q_lane_s64}, + {NEON::BI__builtin_neon_vldap1q_lane_p64, + NEON::BI__builtin_neon_vldap1q_lane_s64}, + {NEON::BI__builtin_neon_vstl1_lane_u64, + NEON::BI__builtin_neon_vstl1_lane_s64}, + {NEON::BI__builtin_neon_vstl1_lane_f64, + NEON::BI__builtin_neon_vstl1_lane_s64}, + {NEON::BI__builtin_neon_vstl1_lane_p64, + NEON::BI__builtin_neon_vstl1_lane_s64}, + {NEON::BI__builtin_neon_vstl1q_lane_u64, + NEON::BI__builtin_neon_vstl1q_lane_s64}, + {NEON::BI__builtin_neon_vstl1q_lane_f64, + NEON::BI__builtin_neon_vstl1q_lane_s64}, + {NEON::BI__builtin_neon_vstl1q_lane_p64, + NEON::BI__builtin_neon_vstl1q_lane_s64}, +}; + +mlir::Value +CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, + ReturnValueSlot returnValue, + llvm::Triple::ArchType arch) { + if (builtinID >= clang::AArch64::FirstSVEBuiltin && + builtinID <= clang::AArch64::LastSVEBuiltin) + return emitAArch64SVEBuiltinExpr(builtinID, expr); + + if (builtinID >= clang::AArch64::FirstSMEBuiltin && + builtinID <= clang::AArch64::LastSMEBuiltin) + return emitAArch64SMEBuiltinExpr(builtinID, expr); + + if (builtinID == Builtin::BI__builtin_cpu_supports) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + switch (builtinID) { + default: + break; + case clang::AArch64::BI__builtin_arm_nop: + case clang::AArch64::BI__builtin_arm_yield: + case clang::AArch64::BI__yield: + case clang::AArch64::BI__builtin_arm_wfe: + case clang::AArch64::BI__wfe: + case clang::AArch64::BI__builtin_arm_wfi: + case clang::AArch64::BI__wfi: + case clang::AArch64::BI__builtin_arm_sev: + case clang::AArch64::BI__sev: + case clang::AArch64::BI__builtin_arm_sevl: + case clang::AArch64::BI__sevl: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_trap) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_get_sme_state) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_rbit) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + if (builtinID == clang::AArch64::BI__builtin_arm_rbit64) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_clz || + builtinID == clang::AArch64::BI__builtin_arm_clz64) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_cls) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + if (builtinID == clang::AArch64::BI__builtin_arm_cls64) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_rint32zf || + builtinID == clang::AArch64::BI__builtin_arm_rint32z) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_rint64zf || + builtinID == clang::AArch64::BI__builtin_arm_rint64z) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_rint32xf || + builtinID == clang::AArch64::BI__builtin_arm_rint32x) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_rint64xf || + builtinID == clang::AArch64::BI__builtin_arm_rint64x) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_jcvt) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_ld64b || + builtinID == clang::AArch64::BI__builtin_arm_st64b || + builtinID == clang::AArch64::BI__builtin_arm_st64bv || + builtinID == clang::AArch64::BI__builtin_arm_st64bv0) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_rndr || + builtinID == clang::AArch64::BI__builtin_arm_rndrrs) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__clear_cache) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if ((builtinID == clang::AArch64::BI__builtin_arm_ldrex || + builtinID == clang::AArch64::BI__builtin_arm_ldaex) && + getContext().getTypeSize(expr->getType()) == 128) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + if (builtinID == clang::AArch64::BI__builtin_arm_ldrex || + builtinID == clang::AArch64::BI__builtin_arm_ldaex) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if ((builtinID == clang::AArch64::BI__builtin_arm_strex || + builtinID == clang::AArch64::BI__builtin_arm_stlex) && + getContext().getTypeSize(expr->getArg(0)->getType()) == 128) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_strex || + builtinID == clang::AArch64::BI__builtin_arm_stlex) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__getReg) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__break) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_clrex) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI_ReadWriteBarrier) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + // CRC32 + Intrinsic::ID crcIntrinsicID = Intrinsic::not_intrinsic; + switch (builtinID) { + case clang::AArch64::BI__builtin_arm_crc32b: + crcIntrinsicID = Intrinsic::aarch64_crc32b; + break; + case clang::AArch64::BI__builtin_arm_crc32cb: + crcIntrinsicID = Intrinsic::aarch64_crc32cb; + break; + case clang::AArch64::BI__builtin_arm_crc32h: + crcIntrinsicID = Intrinsic::aarch64_crc32h; + break; + case clang::AArch64::BI__builtin_arm_crc32ch: + crcIntrinsicID = Intrinsic::aarch64_crc32ch; + break; + case clang::AArch64::BI__builtin_arm_crc32w: + crcIntrinsicID = Intrinsic::aarch64_crc32w; + break; + case clang::AArch64::BI__builtin_arm_crc32cw: + crcIntrinsicID = Intrinsic::aarch64_crc32cw; + break; + case clang::AArch64::BI__builtin_arm_crc32d: + crcIntrinsicID = Intrinsic::aarch64_crc32x; + break; + case clang::AArch64::BI__builtin_arm_crc32cd: + crcIntrinsicID = Intrinsic::aarch64_crc32cx; + break; + } + + if (crcIntrinsicID != Intrinsic::not_intrinsic) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + // Memory Operations (MOPS) + if (builtinID == AArch64::BI__builtin_arm_mops_memset_tag) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + // Memory Tagging Extensions (MTE) Intrinsics + Intrinsic::ID mteIntrinsicID = Intrinsic::not_intrinsic; + switch (builtinID) { + case clang::AArch64::BI__builtin_arm_irg: + mteIntrinsicID = Intrinsic::aarch64_irg; + break; + case clang::AArch64::BI__builtin_arm_addg: + mteIntrinsicID = Intrinsic::aarch64_addg; + break; + case clang::AArch64::BI__builtin_arm_gmi: + mteIntrinsicID = Intrinsic::aarch64_gmi; + break; + case clang::AArch64::BI__builtin_arm_ldg: + mteIntrinsicID = Intrinsic::aarch64_ldg; + break; + case clang::AArch64::BI__builtin_arm_stg: + mteIntrinsicID = Intrinsic::aarch64_stg; + break; + case clang::AArch64::BI__builtin_arm_subp: + mteIntrinsicID = Intrinsic::aarch64_subp; + break; + } + + if (mteIntrinsicID != Intrinsic::not_intrinsic) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_arm_rsr || + builtinID == clang::AArch64::BI__builtin_arm_rsr64 || + builtinID == clang::AArch64::BI__builtin_arm_rsr128 || + builtinID == clang::AArch64::BI__builtin_arm_rsrp || + builtinID == clang::AArch64::BI__builtin_arm_wsr || + builtinID == clang::AArch64::BI__builtin_arm_wsr64 || + builtinID == clang::AArch64::BI__builtin_arm_wsr128 || + builtinID == clang::AArch64::BI__builtin_arm_wsrp) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI_ReadStatusReg || + builtinID == clang::AArch64::BI_WriteStatusReg || + builtinID == clang::AArch64::BI__sys) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI_AddressOfReturnAddress) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__builtin_sponentry) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == clang::AArch64::BI__mulh || + builtinID == clang::AArch64::BI__umulh) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI__writex18byte || + builtinID == AArch64::BI__writex18word || + builtinID == AArch64::BI__writex18dword || + builtinID == AArch64::BI__writex18qword) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI__readx18byte || + builtinID == AArch64::BI__readx18word || + builtinID == AArch64::BI__readx18dword || + builtinID == AArch64::BI__readx18qword) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI__addx18byte || + builtinID == AArch64::BI__addx18word || + builtinID == AArch64::BI__addx18dword || + builtinID == AArch64::BI__addx18qword || + builtinID == AArch64::BI__incx18byte || + builtinID == AArch64::BI__incx18word || + builtinID == AArch64::BI__incx18dword || + builtinID == AArch64::BI__incx18qword) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI_CopyDoubleFromInt64 || + builtinID == AArch64::BI_CopyFloatFromInt32 || + builtinID == AArch64::BI_CopyInt32FromFloat || + builtinID == AArch64::BI_CopyInt64FromDouble) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI_CountLeadingOnes || + builtinID == AArch64::BI_CountLeadingOnes64 || + builtinID == AArch64::BI_CountLeadingZeros || + builtinID == AArch64::BI_CountLeadingZeros64) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI_CountLeadingSigns || + builtinID == AArch64::BI_CountLeadingSigns64) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI_CountOneBits || + builtinID == AArch64::BI_CountOneBits64) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI__prefetch) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == AArch64::BI__hlt) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + if (builtinID == NEON::BI__builtin_neon_vcvth_bf16_f32) { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + // Handle MSVC intrinsics before argument evaluation to prevent double + // evaluation. + assert(!cir::MissingFeatures::msvcBuiltins()); + + // Some intrinsics are equivalent - if they are use the base intrinsic ID. + auto it = llvm::find_if(neonEquivalentIntrinsicMap, [builtinID](auto &p) { + return p.first == builtinID; + }); + if (it != end(neonEquivalentIntrinsicMap)) + builtinID = it->second; + + // Find out if any arguments are required to be integer constant + // expressions. + assert(!cir::MissingFeatures::handleBuiltinICEArguments()); + + assert(!cir::MissingFeatures::neonSISDIntrinsics()); + + // Handle non-overloaded intrinsics first. + switch (builtinID) { + default: + break; + case NEON::BI__builtin_neon_vabsh_f16: + case NEON::BI__builtin_neon_vaddq_p128: + case NEON::BI__builtin_neon_vldrq_p128: + case NEON::BI__builtin_neon_vstrq_p128: + case NEON::BI__builtin_neon_vcvts_f32_u32: + case NEON::BI__builtin_neon_vcvtd_f64_u64: + case NEON::BI__builtin_neon_vcvts_f32_s32: + case NEON::BI__builtin_neon_vcvtd_f64_s64: + case NEON::BI__builtin_neon_vcvth_f16_u16: + case NEON::BI__builtin_neon_vcvth_f16_u32: + case NEON::BI__builtin_neon_vcvth_f16_u64: + case NEON::BI__builtin_neon_vcvth_f16_s16: + case NEON::BI__builtin_neon_vcvth_f16_s32: + case NEON::BI__builtin_neon_vcvth_f16_s64: + case NEON::BI__builtin_neon_vcvtah_u16_f16: + case NEON::BI__builtin_neon_vcvtmh_u16_f16: + case NEON::BI__builtin_neon_vcvtnh_u16_f16: + case NEON::BI__builtin_neon_vcvtph_u16_f16: + case NEON::BI__builtin_neon_vcvth_u16_f16: + case NEON::BI__builtin_neon_vcvtah_s16_f16: + case NEON::BI__builtin_neon_vcvtmh_s16_f16: + case NEON::BI__builtin_neon_vcvtnh_s16_f16: + case NEON::BI__builtin_neon_vcvtph_s16_f16: + case NEON::BI__builtin_neon_vcvth_s16_f16: + case NEON::BI__builtin_neon_vcaleh_f16: + case NEON::BI__builtin_neon_vcalth_f16: + case NEON::BI__builtin_neon_vcageh_f16: + case NEON::BI__builtin_neon_vcagth_f16: + case NEON::BI__builtin_neon_vcvth_n_s16_f16: + case NEON::BI__builtin_neon_vcvth_n_u16_f16: + case NEON::BI__builtin_neon_vcvth_n_f16_s16: + case NEON::BI__builtin_neon_vcvth_n_f16_u16: + case NEON::BI__builtin_neon_vpaddd_s64: + case NEON::BI__builtin_neon_vpaddd_f64: + case NEON::BI__builtin_neon_vpadds_f32: + case NEON::BI__builtin_neon_vceqzd_s64: + case NEON::BI__builtin_neon_vceqzd_f64: + case NEON::BI__builtin_neon_vceqzs_f32: + case NEON::BI__builtin_neon_vceqzh_f16: + case NEON::BI__builtin_neon_vcgezd_s64: + case NEON::BI__builtin_neon_vcgezd_f64: + case NEON::BI__builtin_neon_vcgezs_f32: + case NEON::BI__builtin_neon_vcgezh_f16: + case NEON::BI__builtin_neon_vclezd_s64: + case NEON::BI__builtin_neon_vclezd_f64: + case NEON::BI__builtin_neon_vclezs_f32: + case NEON::BI__builtin_neon_vclezh_f16: + case NEON::BI__builtin_neon_vcgtzd_s64: + case NEON::BI__builtin_neon_vcgtzd_f64: + case NEON::BI__builtin_neon_vcgtzs_f32: + case NEON::BI__builtin_neon_vcgtzh_f16: + case NEON::BI__builtin_neon_vcltzd_s64: + case NEON::BI__builtin_neon_vcltzd_f64: + case NEON::BI__builtin_neon_vcltzs_f32: + case NEON::BI__builtin_neon_vcltzh_f16: + case NEON::BI__builtin_neon_vceqzd_u64: + case NEON::BI__builtin_neon_vceqd_f64: + case NEON::BI__builtin_neon_vcled_f64: + case NEON::BI__builtin_neon_vcltd_f64: + case NEON::BI__builtin_neon_vcged_f64: + case NEON::BI__builtin_neon_vcgtd_f64: + case NEON::BI__builtin_neon_vceqs_f32: + case NEON::BI__builtin_neon_vcles_f32: + case NEON::BI__builtin_neon_vclts_f32: + case NEON::BI__builtin_neon_vcges_f32: + case NEON::BI__builtin_neon_vcgts_f32: + case NEON::BI__builtin_neon_vceqh_f16: + case NEON::BI__builtin_neon_vcleh_f16: + case NEON::BI__builtin_neon_vclth_f16: + case NEON::BI__builtin_neon_vcgeh_f16: + case NEON::BI__builtin_neon_vcgth_f16: + case NEON::BI__builtin_neon_vceqd_s64: + case NEON::BI__builtin_neon_vceqd_u64: + case NEON::BI__builtin_neon_vcgtd_s64: + case NEON::BI__builtin_neon_vcgtd_u64: + case NEON::BI__builtin_neon_vcltd_s64: + case NEON::BI__builtin_neon_vcltd_u64: + case NEON::BI__builtin_neon_vcged_u64: + case NEON::BI__builtin_neon_vcged_s64: + case NEON::BI__builtin_neon_vcled_u64: + case NEON::BI__builtin_neon_vcled_s64: + case NEON::BI__builtin_neon_vtstd_s64: + case NEON::BI__builtin_neon_vtstd_u64: + case NEON::BI__builtin_neon_vset_lane_i8: + case NEON::BI__builtin_neon_vset_lane_i16: + case NEON::BI__builtin_neon_vset_lane_i32: + case NEON::BI__builtin_neon_vset_lane_i64: + case NEON::BI__builtin_neon_vset_lane_bf16: + case NEON::BI__builtin_neon_vset_lane_f32: + case NEON::BI__builtin_neon_vsetq_lane_i8: + case NEON::BI__builtin_neon_vsetq_lane_i16: + case NEON::BI__builtin_neon_vsetq_lane_i32: + case NEON::BI__builtin_neon_vsetq_lane_i64: + case NEON::BI__builtin_neon_vsetq_lane_bf16: + case NEON::BI__builtin_neon_vsetq_lane_f32: + case NEON::BI__builtin_neon_vset_lane_f64: + case NEON::BI__builtin_neon_vset_lane_mf8: + case NEON::BI__builtin_neon_vsetq_lane_mf8: + case NEON::BI__builtin_neon_vsetq_lane_f64: + case NEON::BI__builtin_neon_vget_lane_i8: + case NEON::BI__builtin_neon_vdupb_lane_i8: + case NEON::BI__builtin_neon_vgetq_lane_i8: + case NEON::BI__builtin_neon_vdupb_laneq_i8: + case NEON::BI__builtin_neon_vget_lane_mf8: + case NEON::BI__builtin_neon_vdupb_lane_mf8: + case NEON::BI__builtin_neon_vgetq_lane_mf8: + case NEON::BI__builtin_neon_vdupb_laneq_mf8: + case NEON::BI__builtin_neon_vget_lane_i16: + case NEON::BI__builtin_neon_vduph_lane_i16: + case NEON::BI__builtin_neon_vgetq_lane_i16: + case NEON::BI__builtin_neon_vduph_laneq_i16: + case NEON::BI__builtin_neon_vget_lane_i32: + case NEON::BI__builtin_neon_vdups_lane_i32: + case NEON::BI__builtin_neon_vdups_lane_f32: + case NEON::BI__builtin_neon_vgetq_lane_i32: + case NEON::BI__builtin_neon_vdups_laneq_i32: + case NEON::BI__builtin_neon_vget_lane_i64: + case NEON::BI__builtin_neon_vdupd_lane_i64: + case NEON::BI__builtin_neon_vdupd_lane_f64: + case NEON::BI__builtin_neon_vgetq_lane_i64: + case NEON::BI__builtin_neon_vdupd_laneq_i64: + case NEON::BI__builtin_neon_vget_lane_f32: + case NEON::BI__builtin_neon_vget_lane_f64: + case NEON::BI__builtin_neon_vgetq_lane_f32: + case NEON::BI__builtin_neon_vdups_laneq_f32: + case NEON::BI__builtin_neon_vgetq_lane_f64: + case NEON::BI__builtin_neon_vdupd_laneq_f64: + case NEON::BI__builtin_neon_vaddh_f16: + case NEON::BI__builtin_neon_vsubh_f16: + case NEON::BI__builtin_neon_vmulh_f16: + case NEON::BI__builtin_neon_vdivh_f16: + case NEON::BI__builtin_neon_vfmah_f16: + case NEON::BI__builtin_neon_vfmsh_f16: + case NEON::BI__builtin_neon_vaddd_s64: + case NEON::BI__builtin_neon_vaddd_u64: + case NEON::BI__builtin_neon_vsubd_s64: + case NEON::BI__builtin_neon_vsubd_u64: + case NEON::BI__builtin_neon_vqdmlalh_s16: + case NEON::BI__builtin_neon_vqdmlslh_s16: + case NEON::BI__builtin_neon_vqshlud_n_s64: + case NEON::BI__builtin_neon_vqshld_n_u64: + case NEON::BI__builtin_neon_vqshld_n_s64: + case NEON::BI__builtin_neon_vrshrd_n_u64: + case NEON::BI__builtin_neon_vrshrd_n_s64: + case NEON::BI__builtin_neon_vrsrad_n_u64: + case NEON::BI__builtin_neon_vrsrad_n_s64: + case NEON::BI__builtin_neon_vshld_n_s64: + case NEON::BI__builtin_neon_vshld_n_u64: + case NEON::BI__builtin_neon_vshrd_n_s64: + case NEON::BI__builtin_neon_vshrd_n_u64: + case NEON::BI__builtin_neon_vsrad_n_s64: + case NEON::BI__builtin_neon_vsrad_n_u64: + case NEON::BI__builtin_neon_vqdmlalh_lane_s16: + case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: + case NEON::BI__builtin_neon_vqdmlslh_lane_s16: + case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: + case NEON::BI__builtin_neon_vqdmlals_s32: + case NEON::BI__builtin_neon_vqdmlsls_s32: + case NEON::BI__builtin_neon_vqdmlals_lane_s32: + case NEON::BI__builtin_neon_vqdmlals_laneq_s32: + case NEON::BI__builtin_neon_vqdmlsls_lane_s32: + case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: + case NEON::BI__builtin_neon_vget_lane_bf16: + case NEON::BI__builtin_neon_vduph_lane_bf16: + case NEON::BI__builtin_neon_vduph_lane_f16: + case NEON::BI__builtin_neon_vgetq_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: + case NEON::BI__builtin_neon_vduph_laneq_f16: + case NEON::BI__builtin_neon_vcvt_bf16_f32: + case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: + case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: + case clang::AArch64::BI_InterlockedAdd: + case clang::AArch64::BI_InterlockedAdd_acq: + case clang::AArch64::BI_InterlockedAdd_rel: + case clang::AArch64::BI_InterlockedAdd_nf: + case clang::AArch64::BI_InterlockedAdd64: + case clang::AArch64::BI_InterlockedAdd64_acq: + case clang::AArch64::BI_InterlockedAdd64_rel: + case clang::AArch64::BI_InterlockedAdd64_nf: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + // Not all intrinsics handled by the common case work for AArch64 yet, so only + // defer to common code if it's been added to our special map. + assert(!cir::MissingFeatures::aarch64SIMDIntrinsics()); + + assert(!cir::MissingFeatures::aarch64TblBuiltinExpr()); + + switch (builtinID) { + default: + return {}; + case NEON::BI__builtin_neon_vbsl_v: + case NEON::BI__builtin_neon_vbslq_v: + case NEON::BI__builtin_neon_vfma_lane_v: + case NEON::BI__builtin_neon_vfmaq_lane_v: + case NEON::BI__builtin_neon_vfma_laneq_v: + case NEON::BI__builtin_neon_vfmaq_laneq_v: + case NEON::BI__builtin_neon_vfmah_lane_f16: + case NEON::BI__builtin_neon_vfmas_lane_f32: + case NEON::BI__builtin_neon_vfmah_laneq_f16: + case NEON::BI__builtin_neon_vfmas_laneq_f32: + case NEON::BI__builtin_neon_vfmad_lane_f64: + case NEON::BI__builtin_neon_vfmad_laneq_f64: + case NEON::BI__builtin_neon_vmull_v: + case NEON::BI__builtin_neon_vmax_v: + case NEON::BI__builtin_neon_vmaxq_v: + case NEON::BI__builtin_neon_vmaxh_f16: + case NEON::BI__builtin_neon_vmin_v: + case NEON::BI__builtin_neon_vminq_v: + case NEON::BI__builtin_neon_vminh_f16: + case NEON::BI__builtin_neon_vabd_v: + case NEON::BI__builtin_neon_vabdq_v: + case NEON::BI__builtin_neon_vpadal_v: + case NEON::BI__builtin_neon_vpadalq_v: + case NEON::BI__builtin_neon_vpmin_v: + case NEON::BI__builtin_neon_vpminq_v: + case NEON::BI__builtin_neon_vpmax_v: + case NEON::BI__builtin_neon_vpmaxq_v: + case NEON::BI__builtin_neon_vminnm_v: + case NEON::BI__builtin_neon_vminnmq_v: + case NEON::BI__builtin_neon_vminnmh_f16: + case NEON::BI__builtin_neon_vmaxnm_v: + case NEON::BI__builtin_neon_vmaxnmq_v: + case NEON::BI__builtin_neon_vmaxnmh_f16: + case NEON::BI__builtin_neon_vrecpss_f32: + case NEON::BI__builtin_neon_vrecpsd_f64: + case NEON::BI__builtin_neon_vrecpsh_f16: + case NEON::BI__builtin_neon_vqshrun_n_v: + case NEON::BI__builtin_neon_vqrshrun_n_v: + case NEON::BI__builtin_neon_vqshrn_n_v: + case NEON::BI__builtin_neon_vrshrn_n_v: + case NEON::BI__builtin_neon_vqrshrn_n_v: + case NEON::BI__builtin_neon_vrndah_f16: + case NEON::BI__builtin_neon_vrnda_v: + case NEON::BI__builtin_neon_vrndaq_v: + case NEON::BI__builtin_neon_vrndih_f16: + case NEON::BI__builtin_neon_vrndmh_f16: + case NEON::BI__builtin_neon_vrndm_v: + case NEON::BI__builtin_neon_vrndmq_v: + case NEON::BI__builtin_neon_vrndnh_f16: + case NEON::BI__builtin_neon_vrndn_v: + case NEON::BI__builtin_neon_vrndnq_v: + case NEON::BI__builtin_neon_vrndns_f32: + case NEON::BI__builtin_neon_vrndph_f16: + case NEON::BI__builtin_neon_vrndp_v: + case NEON::BI__builtin_neon_vrndpq_v: + case NEON::BI__builtin_neon_vrndxh_f16: + case NEON::BI__builtin_neon_vrndx_v: + case NEON::BI__builtin_neon_vrndxq_v: + case NEON::BI__builtin_neon_vrndh_f16: + case NEON::BI__builtin_neon_vrnd32x_f32: + case NEON::BI__builtin_neon_vrnd32xq_f32: + case NEON::BI__builtin_neon_vrnd32x_f64: + case NEON::BI__builtin_neon_vrnd32xq_f64: + case NEON::BI__builtin_neon_vrnd32z_f32: + case NEON::BI__builtin_neon_vrnd32zq_f32: + case NEON::BI__builtin_neon_vrnd32z_f64: + case NEON::BI__builtin_neon_vrnd32zq_f64: + case NEON::BI__builtin_neon_vrnd64x_f32: + case NEON::BI__builtin_neon_vrnd64xq_f32: + case NEON::BI__builtin_neon_vrnd64x_f64: + case NEON::BI__builtin_neon_vrnd64xq_f64: + case NEON::BI__builtin_neon_vrnd64z_f32: + case NEON::BI__builtin_neon_vrnd64zq_f32: + case NEON::BI__builtin_neon_vrnd64z_f64: + case NEON::BI__builtin_neon_vrnd64zq_f64: + case NEON::BI__builtin_neon_vrnd_v: + case NEON::BI__builtin_neon_vrndq_v: + case NEON::BI__builtin_neon_vcvt_f64_v: + case NEON::BI__builtin_neon_vcvtq_f64_v: + case NEON::BI__builtin_neon_vcvt_f64_f32: + case NEON::BI__builtin_neon_vcvt_f32_f64: + case NEON::BI__builtin_neon_vcvt_s32_v: + case NEON::BI__builtin_neon_vcvt_u32_v: + case NEON::BI__builtin_neon_vcvt_s64_v: + case NEON::BI__builtin_neon_vcvt_u64_v: + case NEON::BI__builtin_neon_vcvt_s16_f16: + case NEON::BI__builtin_neon_vcvt_u16_f16: + case NEON::BI__builtin_neon_vcvtq_s32_v: + case NEON::BI__builtin_neon_vcvtq_u32_v: + case NEON::BI__builtin_neon_vcvtq_s64_v: + case NEON::BI__builtin_neon_vcvtq_u64_v: + case NEON::BI__builtin_neon_vcvtq_s16_f16: + case NEON::BI__builtin_neon_vcvtq_u16_f16: + case NEON::BI__builtin_neon_vcvta_s16_f16: + case NEON::BI__builtin_neon_vcvta_u16_f16: + case NEON::BI__builtin_neon_vcvta_s32_v: + case NEON::BI__builtin_neon_vcvtaq_s16_f16: + case NEON::BI__builtin_neon_vcvtaq_s32_v: + case NEON::BI__builtin_neon_vcvta_u32_v: + case NEON::BI__builtin_neon_vcvtaq_u16_f16: + case NEON::BI__builtin_neon_vcvtaq_u32_v: + case NEON::BI__builtin_neon_vcvta_s64_v: + case NEON::BI__builtin_neon_vcvtaq_s64_v: + case NEON::BI__builtin_neon_vcvta_u64_v: + case NEON::BI__builtin_neon_vcvtaq_u64_v: + case NEON::BI__builtin_neon_vcvtm_s16_f16: + case NEON::BI__builtin_neon_vcvtm_s32_v: + case NEON::BI__builtin_neon_vcvtmq_s16_f16: + case NEON::BI__builtin_neon_vcvtmq_s32_v: + case NEON::BI__builtin_neon_vcvtm_u16_f16: + case NEON::BI__builtin_neon_vcvtm_u32_v: + case NEON::BI__builtin_neon_vcvtmq_u16_f16: + case NEON::BI__builtin_neon_vcvtmq_u32_v: + case NEON::BI__builtin_neon_vcvtm_s64_v: + case NEON::BI__builtin_neon_vcvtmq_s64_v: + case NEON::BI__builtin_neon_vcvtm_u64_v: + case NEON::BI__builtin_neon_vcvtmq_u64_v: + case NEON::BI__builtin_neon_vcvtn_s16_f16: + case NEON::BI__builtin_neon_vcvtn_s32_v: + case NEON::BI__builtin_neon_vcvtnq_s16_f16: + case NEON::BI__builtin_neon_vcvtnq_s32_v: + case NEON::BI__builtin_neon_vcvtn_u16_f16: + case NEON::BI__builtin_neon_vcvtn_u32_v: + case NEON::BI__builtin_neon_vcvtnq_u16_f16: + case NEON::BI__builtin_neon_vcvtnq_u32_v: + case NEON::BI__builtin_neon_vcvtn_s64_v: + case NEON::BI__builtin_neon_vcvtnq_s64_v: + case NEON::BI__builtin_neon_vcvtn_u64_v: + case NEON::BI__builtin_neon_vcvtnq_u64_v: + case NEON::BI__builtin_neon_vcvtp_s16_f16: + case NEON::BI__builtin_neon_vcvtp_s32_v: + case NEON::BI__builtin_neon_vcvtpq_s16_f16: + case NEON::BI__builtin_neon_vcvtpq_s32_v: + case NEON::BI__builtin_neon_vcvtp_u16_f16: + case NEON::BI__builtin_neon_vcvtp_u32_v: + case NEON::BI__builtin_neon_vcvtpq_u16_f16: + case NEON::BI__builtin_neon_vcvtpq_u32_v: + case NEON::BI__builtin_neon_vcvtp_s64_v: + case NEON::BI__builtin_neon_vcvtpq_s64_v: + case NEON::BI__builtin_neon_vcvtp_u64_v: + case NEON::BI__builtin_neon_vcvtpq_u64_v: + case NEON::BI__builtin_neon_vmulx_v: + case NEON::BI__builtin_neon_vmulxq_v: + case NEON::BI__builtin_neon_vmulxh_lane_f16: + case NEON::BI__builtin_neon_vmulxh_laneq_f16: + case NEON::BI__builtin_neon_vmul_lane_v: + case NEON::BI__builtin_neon_vmul_laneq_v: + case NEON::BI__builtin_neon_vnegd_s64: + case NEON::BI__builtin_neon_vnegh_f16: + case NEON::BI__builtin_neon_vpmaxnm_v: + case NEON::BI__builtin_neon_vpmaxnmq_v: + case NEON::BI__builtin_neon_vpminnm_v: + case NEON::BI__builtin_neon_vpminnmq_v: + case NEON::BI__builtin_neon_vsqrth_f16: + case NEON::BI__builtin_neon_vsqrt_v: + case NEON::BI__builtin_neon_vsqrtq_v: + case NEON::BI__builtin_neon_vrbit_v: + case NEON::BI__builtin_neon_vrbitq_v: + case NEON::BI__builtin_neon_vmaxv_f16: + case NEON::BI__builtin_neon_vmaxvq_f16: + case NEON::BI__builtin_neon_vminv_f16: + case NEON::BI__builtin_neon_vminvq_f16: + case NEON::BI__builtin_neon_vmaxnmv_f16: + case NEON::BI__builtin_neon_vmaxnmvq_f16: + case NEON::BI__builtin_neon_vminnmv_f16: + case NEON::BI__builtin_neon_vminnmvq_f16: + case NEON::BI__builtin_neon_vmul_n_f64: + case NEON::BI__builtin_neon_vaddlv_u8: + case NEON::BI__builtin_neon_vaddlv_u16: + case NEON::BI__builtin_neon_vaddlvq_u8: + case NEON::BI__builtin_neon_vaddlvq_u16: + case NEON::BI__builtin_neon_vaddlv_s8: + case NEON::BI__builtin_neon_vaddlv_s16: + case NEON::BI__builtin_neon_vaddlvq_s8: + case NEON::BI__builtin_neon_vaddlvq_s16: + case NEON::BI__builtin_neon_vsri_n_v: + case NEON::BI__builtin_neon_vsriq_n_v: + case NEON::BI__builtin_neon_vsli_n_v: + case NEON::BI__builtin_neon_vsliq_n_v: + case NEON::BI__builtin_neon_vsra_n_v: + case NEON::BI__builtin_neon_vsraq_n_v: + case NEON::BI__builtin_neon_vrsra_n_v: + case NEON::BI__builtin_neon_vrsraq_n_v: + case NEON::BI__builtin_neon_vld1_v: + case NEON::BI__builtin_neon_vld1q_v: + case NEON::BI__builtin_neon_vst1_v: + case NEON::BI__builtin_neon_vst1q_v: + case NEON::BI__builtin_neon_vld1_lane_v: + case NEON::BI__builtin_neon_vld1q_lane_v: + case NEON::BI__builtin_neon_vldap1_lane_s64: + case NEON::BI__builtin_neon_vldap1q_lane_s64: + case NEON::BI__builtin_neon_vld1_dup_v: + case NEON::BI__builtin_neon_vld1q_dup_v: + case NEON::BI__builtin_neon_vst1_lane_v: + case NEON::BI__builtin_neon_vst1q_lane_v: + case NEON::BI__builtin_neon_vstl1_lane_s64: + case NEON::BI__builtin_neon_vstl1q_lane_s64: + case NEON::BI__builtin_neon_vld2_v: + case NEON::BI__builtin_neon_vld2q_v: + case NEON::BI__builtin_neon_vld3_v: + case NEON::BI__builtin_neon_vld3q_v: + case NEON::BI__builtin_neon_vld4_v: + case NEON::BI__builtin_neon_vld4q_v: + case NEON::BI__builtin_neon_vld2_dup_v: + case NEON::BI__builtin_neon_vld2q_dup_v: + case NEON::BI__builtin_neon_vld3_dup_v: + case NEON::BI__builtin_neon_vld3q_dup_v: + case NEON::BI__builtin_neon_vld4_dup_v: + case NEON::BI__builtin_neon_vld4q_dup_v: + case NEON::BI__builtin_neon_vld2_lane_v: + case NEON::BI__builtin_neon_vld2q_lane_v: + case NEON::BI__builtin_neon_vld3_lane_v: + case NEON::BI__builtin_neon_vld3q_lane_v: + case NEON::BI__builtin_neon_vld4_lane_v: + case NEON::BI__builtin_neon_vld4q_lane_v: + case NEON::BI__builtin_neon_vst2_v: + case NEON::BI__builtin_neon_vst2q_v: + case NEON::BI__builtin_neon_vst2_lane_v: + case NEON::BI__builtin_neon_vst2q_lane_v: + case NEON::BI__builtin_neon_vst3_v: + case NEON::BI__builtin_neon_vst3q_v: + case NEON::BI__builtin_neon_vst3_lane_v: + case NEON::BI__builtin_neon_vst3q_lane_v: + case NEON::BI__builtin_neon_vst4_v: + case NEON::BI__builtin_neon_vst4q_v: + case NEON::BI__builtin_neon_vst4_lane_v: + case NEON::BI__builtin_neon_vst4q_lane_v: + case NEON::BI__builtin_neon_vtrn_v: + case NEON::BI__builtin_neon_vtrnq_v: + case NEON::BI__builtin_neon_vuzp_v: + case NEON::BI__builtin_neon_vuzpq_v: + case NEON::BI__builtin_neon_vzip_v: + case NEON::BI__builtin_neon_vzipq_v: + case NEON::BI__builtin_neon_vqtbl1q_v: + case NEON::BI__builtin_neon_vqtbl2q_v: + case NEON::BI__builtin_neon_vqtbl3q_v: + case NEON::BI__builtin_neon_vqtbl4q_v: + case NEON::BI__builtin_neon_vqtbx1q_v: + case NEON::BI__builtin_neon_vqtbx2q_v: + case NEON::BI__builtin_neon_vqtbx3q_v: + case NEON::BI__builtin_neon_vqtbx4q_v: + case NEON::BI__builtin_neon_vsqadd_v: + case NEON::BI__builtin_neon_vsqaddq_v: + case NEON::BI__builtin_neon_vuqadd_v: + case NEON::BI__builtin_neon_vuqaddq_v: + case NEON::BI__builtin_neon_vluti2_laneq_mf8: + case NEON::BI__builtin_neon_vluti2_laneq_bf16: + case NEON::BI__builtin_neon_vluti2_laneq_f16: + case NEON::BI__builtin_neon_vluti2_laneq_p16: + case NEON::BI__builtin_neon_vluti2_laneq_p8: + case NEON::BI__builtin_neon_vluti2_laneq_s16: + case NEON::BI__builtin_neon_vluti2_laneq_s8: + case NEON::BI__builtin_neon_vluti2_laneq_u16: + case NEON::BI__builtin_neon_vluti2_laneq_u8: + case NEON::BI__builtin_neon_vluti2q_laneq_mf8: + case NEON::BI__builtin_neon_vluti2q_laneq_bf16: + case NEON::BI__builtin_neon_vluti2q_laneq_f16: + case NEON::BI__builtin_neon_vluti2q_laneq_p16: + case NEON::BI__builtin_neon_vluti2q_laneq_p8: + case NEON::BI__builtin_neon_vluti2q_laneq_s16: + case NEON::BI__builtin_neon_vluti2q_laneq_s8: + case NEON::BI__builtin_neon_vluti2q_laneq_u16: + case NEON::BI__builtin_neon_vluti2q_laneq_u8: + case NEON::BI__builtin_neon_vluti2_lane_mf8: + case NEON::BI__builtin_neon_vluti2_lane_bf16: + case NEON::BI__builtin_neon_vluti2_lane_f16: + case NEON::BI__builtin_neon_vluti2_lane_p16: + case NEON::BI__builtin_neon_vluti2_lane_p8: + case NEON::BI__builtin_neon_vluti2_lane_s16: + case NEON::BI__builtin_neon_vluti2_lane_s8: + case NEON::BI__builtin_neon_vluti2_lane_u16: + case NEON::BI__builtin_neon_vluti2_lane_u8: + case NEON::BI__builtin_neon_vluti2q_lane_mf8: + case NEON::BI__builtin_neon_vluti2q_lane_bf16: + case NEON::BI__builtin_neon_vluti2q_lane_f16: + case NEON::BI__builtin_neon_vluti2q_lane_p16: + case NEON::BI__builtin_neon_vluti2q_lane_p8: + case NEON::BI__builtin_neon_vluti2q_lane_s16: + case NEON::BI__builtin_neon_vluti2q_lane_s8: + case NEON::BI__builtin_neon_vluti2q_lane_u16: + case NEON::BI__builtin_neon_vluti2q_lane_u8: + case NEON::BI__builtin_neon_vluti4q_lane_mf8: + case NEON::BI__builtin_neon_vluti4q_lane_p8: + case NEON::BI__builtin_neon_vluti4q_lane_s8: + case NEON::BI__builtin_neon_vluti4q_lane_u8: + case NEON::BI__builtin_neon_vluti4q_laneq_mf8: + case NEON::BI__builtin_neon_vluti4q_laneq_p8: + case NEON::BI__builtin_neon_vluti4q_laneq_s8: + case NEON::BI__builtin_neon_vluti4q_laneq_u8: + case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2: + case NEON::BI__builtin_neon_vluti4q_lane_f16_x2: + case NEON::BI__builtin_neon_vluti4q_lane_p16_x2: + case NEON::BI__builtin_neon_vluti4q_lane_s16_x2: + case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: + case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: + case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: + case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: + case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: + case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: + case NEON::BI__builtin_neon_vdot_f16_mf8_fpm: + case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm: + case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm: + case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vdot_f32_mf8_fpm: + case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm: + case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm: + case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm: + case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm: + case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm: + case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm: + case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm: + case NEON::BI__builtin_neon_vamin_f16: + case NEON::BI__builtin_neon_vaminq_f16: + case NEON::BI__builtin_neon_vamin_f32: + case NEON::BI__builtin_neon_vaminq_f32: + case NEON::BI__builtin_neon_vaminq_f64: + case NEON::BI__builtin_neon_vamax_f16: + case NEON::BI__builtin_neon_vamaxq_f16: + case NEON::BI__builtin_neon_vamax_f32: + case NEON::BI__builtin_neon_vamaxq_f32: + case NEON::BI__builtin_neon_vamaxq_f64: + case NEON::BI__builtin_neon_vscale_f16: + case NEON::BI__builtin_neon_vscaleq_f16: + case NEON::BI__builtin_neon_vscale_f32: + case NEON::BI__builtin_neon_vscaleq_f32: + case NEON::BI__builtin_neon_vscaleq_f64: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } + + // Unreachable: All cases in the switch above return. +} diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 0198a9d..62836ce 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -11,28 +11,388 @@ // //===----------------------------------------------------------------------===// +#include "CIRGenBuilder.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/ValueRange.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" -#include "llvm/IR/IntrinsicsX86.h" using namespace clang; using namespace clang::CIRGen; +template <typename... Operands> +static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, + mlir::Location loc, const StringRef str, + const mlir::Type &resTy, + Operands &&...op) { + return cir::LLVMIntrinsicCallOp::create(builder, loc, + builder.getStringAttr(str), resTy, + std::forward<Operands>(op)...) + .getResult(); +} + +// OG has unordered comparison as a form of optimization in addition to +// ordered comparison, while CIR doesn't. +// +// This means that we can't encode the comparison code of UGT (unordered +// greater than), at least not at the CIR level. +// +// The boolean shouldInvert compensates for this. +// For example: to get to the comparison code UGT, we pass in +// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT. + +// There are several ways to support this otherwise: +// - register extra CmpOpKind for unordered comparison types and build the +// translation code for +// to go from CIR -> LLVM dialect. Notice we get this naturally with +// shouldInvert, benefiting from existing infrastructure, albeit having to +// generate an extra `not` at CIR). +// - Just add extra comparison code to a new VecCmpOpKind instead of +// cluttering CmpOpKind. +// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered +// comparison +// - Just emit the intrinsics call instead of calling this helper, see how the +// LLVM lowering handles this. +static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, + llvm::SmallVector<mlir::Value> &ops, + mlir::Location loc, cir::CmpOpKind pred, + bool shouldInvert) { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented + assert(!cir::MissingFeatures::emitConstrainedFPCall()); + mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]); + mlir::Value bitCast = builder.createBitcast( + shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType()); + return bitCast; +} + +static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc, + mlir::Value mask, unsigned numElems) { + auto maskTy = cir::VectorType::get( + builder.getUIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth()); + mlir::Value maskVec = builder.createBitcast(mask, maskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (numElems < 8) { + SmallVector<mlir::Attribute, 4> indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq<unsigned>(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, i)); + + maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices); + } + return maskVec; +} + +// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins. +// +// The vector is split into lanes of 8 word elements (16 bits). The lower or +// upper half of each lane, controlled by `isLow`, is shuffled in the following +// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The +// i-th field's value represents the resulting index of the i-th element in the +// half lane after shuffling. The other half of the lane remains unchanged. +static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder, + const mlir::Value vec, + const mlir::Value immediate, + const mlir::Location loc, + const bool isLow) { + uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate); + + auto vecTy = cast<cir::VectorType>(vec.getType()); + unsigned numElts = vecTy.getSize(); + + unsigned firstHalfStart = isLow ? 0 : 4; + unsigned secondHalfStart = 4 - firstHalfStart; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + imm = (imm & 0xff) * 0x01010101; + + int64_t indices[32]; + for (unsigned l = 0; l != numElts; l += 8) { + for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) { + indices[l + i] = l + (imm & 3) + firstHalfStart; + imm >>= 2; + } + for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i) + indices[l + i] = l + i; + } + + return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts)); +} + +// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins. +// The shuffle mask is written to outIndices. +static void +computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, + uint32_t imm, const bool isShufP, + llvm::SmallVectorImpl<int64_t> &outIndices) { + auto vecTy = cast<cir::VectorType>(vec.getType()); + unsigned numElts = vecTy.getSize(); + unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128; + unsigned numLaneElts = numElts / numLanes; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + imm = (imm & 0xff) * 0x01010101; + + for (unsigned l = 0; l != numElts; l += numLaneElts) { + for (unsigned i = 0; i != numLaneElts; ++i) { + uint32_t idx = imm % numLaneElts; + imm /= numLaneElts; + if (isShufP && i >= (numLaneElts / 2)) + idx += numElts; + outIndices[l + i] = l + idx; + } + } + + outIndices.resize(numElts); +} +static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, + mlir::Location loc, mlir::Value source, + mlir::Value mask, + mlir::Value inputVector, + const std::string &id) { + auto resultTy = cast<cir::VectorType>(mask.getType()); + mlir::Value maskValue = getMaskVecValue( + builder, loc, inputVector, cast<cir::VectorType>(resultTy).getSize()); + return emitIntrinsicCallOp(builder, loc, id, resultTy, + mlir::ValueRange{source, mask, maskValue}); +} + +static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, + mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl<mlir::Value> &ops) { + + auto intTy = cast<cir::IntType>(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); + mlir::Type vecTy = lhsVec.getType(); + mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy, + mlir::ValueRange{lhsVec, rhsVec}); + return builder.createBitcast(resVec, ops[0].getType()); +} + +static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, + mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl<mlir::Value> &ops) { + unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth(); + + // Convert both operands to mask vectors. + mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems); + mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems); + + mlir::Type i32Ty = builder.getSInt32Ty(); + + // Create indices for extracting the first half of each vector. + SmallVector<mlir::Attribute, 32> halfIndices; + for (auto i : llvm::seq<unsigned>(0, numElems / 2)) + halfIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Extract first half of each vector. This gives better codegen than + // doing it in a single shuffle. + mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices); + mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices); + + // Create indices for concatenating the vectors. + // NOTE: Operands are swapped to match the intrinsic definition. + // After the half extraction, both vectors have numElems/2 elements. + // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1] + // select from rhsHalf, and indices [numElems/2..numElems-1] select from + // lhsHalf. + SmallVector<mlir::Attribute, 64> concatIndices; + for (auto i : llvm::seq<unsigned>(0, numElems)) + concatIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Concat the vectors (RHS first, then LHS). + mlir::Value res = + builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices); + return builder.createBitcast(res, ops[0].getType()); +} + +static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, + mlir::Location loc, + cir::BinOpKind binOpKind, + SmallVectorImpl<mlir::Value> &ops, + bool invertLHS = false) { + unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth(); + mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts); + + if (invertLHS) + lhs = builder.createNot(lhs); + return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs), + ops[0].getType()); +} + +static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl<mlir::Value> &ops) { + auto intTy = cast<cir::IntType>(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts); + mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts); + mlir::Type resTy = builder.getSInt32Ty(); + return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy, + mlir::ValueRange{lhsVec, rhsVec}); +} + +static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc, + mlir::Value vec, mlir::Value value, + mlir::Value indexOp) { + unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize(); + + uint64_t index = + indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue(); + + index &= numElts - 1; + + cir::ConstantOp indexVal = builder.getUInt64(index, loc); + + return cir::VecInsertOp::create(builder, loc, vec, value, indexVal); +} + +static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder, + mlir::Location location, mlir::Value &op0, + mlir::Value &op1, mlir::Value &amt, + bool isRight) { + mlir::Type op0Ty = op0.getType(); + + // Amount may be scalar immediate, in which case create a splat vector. + // Funnel shifts amounts are treated as modulo and types are all power-of-2 + // so we only care about the lowest log2 bits anyway. + if (amt.getType() != op0Ty) { + auto vecTy = mlir::cast<cir::VectorType>(op0Ty); + uint64_t numElems = vecTy.getSize(); + + auto amtTy = mlir::cast<cir::IntType>(amt.getType()); + auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType()); + + // If signed, cast to the same width but unsigned first to + // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`. + if (amtTy.isSigned()) { + cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth()); + amt = builder.createIntCast(amt, unsignedAmtTy); + } + cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth()); + amt = builder.createIntCast(amt, unsignedVecElemType); + amt = cir::VecSplatOp::create( + builder, location, cir::VectorType::get(unsignedVecElemType, numElems), + amt); + } + + const StringRef intrinsicName = isRight ? "fshr" : "fshl"; + return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty, + mlir::ValueRange{op0, op1, amt}); +} + +static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, + bool isSigned, + SmallVectorImpl<mlir::Value> &ops, + unsigned opTypePrimitiveSizeInBits) { + mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(), + opTypePrimitiveSizeInBits / 64); + mlir::Value lhs = builder.createBitcast(loc, ops[0], ty); + mlir::Value rhs = builder.createBitcast(loc, ops[1], ty); + if (isSigned) { + cir::ConstantOp shiftAmt = + builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32)); + cir::VecSplatOp shiftSplatVecOp = + cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult()); + mlir::Value shiftSplatValue = shiftSplatVecOp.getResult(); + // In CIR, right-shift operations are automatically lowered to either an + // arithmetic or logical shift depending on the operand type. The purpose + // of the shifts here is to propagate the sign bit of the 32-bit input + // into the upper bits of each vector lane. + lhs = builder.createShift(loc, lhs, shiftSplatValue, true); + lhs = builder.createShift(loc, lhs, shiftSplatValue, false); + rhs = builder.createShift(loc, rhs, shiftSplatValue, true); + rhs = builder.createShift(loc, rhs, shiftSplatValue, false); + } else { + cir::ConstantOp maskScalar = builder.getConstant( + loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff)); + cir::VecSplatOp mask = + cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult()); + // Clear the upper bits + lhs = builder.createAnd(loc, lhs, mask); + rhs = builder.createAnd(loc, rhs, mask); + } + return builder.createMul(loc, lhs, rhs); +} + +static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, + llvm::SmallVector<mlir::Value> ops, + bool isSigned) { + mlir::Value op0 = ops[0]; + mlir::Value op1 = ops[1]; + + cir::VectorType ty = cast<cir::VectorType>(op0.getType()); + cir::IntType elementTy = cast<cir::IntType>(ty.getElementType()); + + uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7; + + cir::CmpOpKind pred; + switch (imm) { + case 0x0: + pred = cir::CmpOpKind::lt; + break; + case 0x1: + pred = cir::CmpOpKind::le; + break; + case 0x2: + pred = cir::CmpOpKind::gt; + break; + case 0x3: + pred = cir::CmpOpKind::ge; + break; + case 0x4: + pred = cir::CmpOpKind::eq; + break; + case 0x5: + pred = cir::CmpOpKind::ne; + break; + case 0x6: + return builder.getNullValue(ty, loc); // FALSE + case 0x7: { + llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth()); + return cir::VecSplatOp::create( + builder, loc, ty, + builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE + } + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + if ((!isSigned && elementTy.isSigned()) || + (isSigned && elementTy.isUnsigned())) { + elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth()) + : builder.getSIntNTy(elementTy.getWidth()); + ty = cir::VectorType::get(elementTy, ty.getSize()); + op0 = builder.createBitcast(op0, ty); + op1 = builder.createBitcast(op1, ty); + } + + return builder.createVecCompare(loc, pred, op0, op1); +} + mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, - const CallExpr *e) { + const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { - cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is"); + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is"); return {}; } if (builtinID == Builtin::BI__builtin_cpu_supports) { - cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports"); + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports"); return {}; } if (builtinID == Builtin::BI__builtin_cpu_init) { - cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init"); + cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init"); return {}; } @@ -43,26 +403,74 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, // Find out if any arguments are required to be integer constant expressions. assert(!cir::MissingFeatures::handleBuiltinICEArguments()); + // The operands of the builtin call + llvm::SmallVector<mlir::Value> ops; + + // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit + // is required to be a constant integer expression. + unsigned iceArguments = 0; + ASTContext::GetBuiltinTypeError error; + getContext().GetBuiltinType(builtinID, error, &iceArguments); + assert(error == ASTContext::GE_None && "Error while getting builtin type."); + + for (auto [idx, arg] : llvm::enumerate(expr->arguments())) + ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg)); + + CIRGenBuilderTy &builder = getBuilder(); + mlir::Type voidTy = builder.getVoidTy(); + switch (builtinID) { default: return {}; - case X86::BI_mm_prefetch: case X86::BI_mm_clflush: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.clflush", voidTy, ops[0]); case X86::BI_mm_lfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.lfence", voidTy); case X86::BI_mm_pause: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.pause", voidTy); case X86::BI_mm_mfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse2.mfence", voidTy); case X86::BI_mm_sfence: + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.sse.sfence", voidTy); + case X86::BI_mm_prefetch: case X86::BI__rdtsc: - case X86::BI__builtin_ia32_rdtscp: + case X86::BI__builtin_ia32_rdtscp: { + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + } case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: - case X86::BI__builtin_ia32_lzcnt_u64: + case X86::BI__builtin_ia32_lzcnt_u64: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value isZeroPoison = builder.getFalse(loc); + return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: - case X86::BI__builtin_ia32_tzcnt_u64: + case X86::BI__builtin_ia32_tzcnt_u64: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value isZeroPoison = builder.getFalse(loc); + return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(), + mlir::ValueRange{ops[0], isZeroPoison}); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return builder.getNullValue(convertType(expr->getType()), + getLoc(expr->getExprLoc())); case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: @@ -72,7 +480,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vec_ext_v32qi: case X86::BI__builtin_ia32_vec_ext_v16hi: case X86::BI__builtin_ia32_vec_ext_v8si: - case X86::BI__builtin_ia32_vec_ext_v4di: + case X86::BI__builtin_ia32_vec_ext_v4di: { + unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize(); + + uint64_t index = getZExtIntValueFromConstOp(ops[1]); + index &= numElts - 1; + + cir::ConstantOp indexVal = + builder.getUInt64(index, getLoc(expr->getExprLoc())); + + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()), + ops[0], indexVal); + } case X86::BI__builtin_ia32_vec_set_v4hi: case X86::BI__builtin_ia32_vec_set_v16qi: case X86::BI__builtin_ia32_vec_set_v8hi: @@ -81,11 +502,35 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vec_set_v32qi: case X86::BI__builtin_ia32_vec_set_v16hi: case X86::BI__builtin_ia32_vec_set_v8si: - case X86::BI__builtin_ia32_vec_set_v4di: + case X86::BI__builtin_ia32_vec_set_v4di: { + return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1], + ops[2]); + } + case X86::BI__builtin_ia32_kunpckhi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackb", ops); + case X86::BI__builtin_ia32_kunpcksi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackw", ops); + case X86::BI__builtin_ia32_kunpckdi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackd", ops); case X86::BI_mm_setcsr: - case X86::BI__builtin_ia32_ldmxcsr: + case X86::BI__builtin_ia32_ldmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getArg(0)->getType(), loc); + builder.createStore(loc, ops[0], tmp); + return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr", + builder.getVoidTy(), tmp.getPointer()); + } case X86::BI_mm_getcsr: - case X86::BI__builtin_ia32_stmxcsr: + case X86::BI__builtin_ia32_stmxcsr: { + mlir::Location loc = getLoc(expr->getExprLoc()); + Address tmp = createMemTemp(expr->getType(), loc); + emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(), + tmp.getPointer()); + return builder.createLoad(loc, tmp); + } case X86::BI__builtin_ia32_xsave: case X86::BI__builtin_ia32_xsave64: case X86::BI__builtin_ia32_xrstor: @@ -99,9 +544,78 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_xsaves: case X86::BI__builtin_ia32_xsaves64: case X86::BI__builtin_ia32_xsetbv: - case X86::BI_xsetbv: + case X86::BI_xsetbv: { + mlir::Location loc = getLoc(expr->getExprLoc()); + StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_xsave: + intrinsicName = "x86.xsave"; + break; + case X86::BI__builtin_ia32_xsave64: + intrinsicName = "x86.xsave64"; + break; + case X86::BI__builtin_ia32_xrstor: + intrinsicName = "x86.xrstor"; + break; + case X86::BI__builtin_ia32_xrstor64: + intrinsicName = "x86.xrstor64"; + break; + case X86::BI__builtin_ia32_xsaveopt: + intrinsicName = "x86.xsaveopt"; + break; + case X86::BI__builtin_ia32_xsaveopt64: + intrinsicName = "x86.xsaveopt64"; + break; + case X86::BI__builtin_ia32_xrstors: + intrinsicName = "x86.xrstors"; + break; + case X86::BI__builtin_ia32_xrstors64: + intrinsicName = "x86.xrstors64"; + break; + case X86::BI__builtin_ia32_xsavec: + intrinsicName = "x86.xsavec"; + break; + case X86::BI__builtin_ia32_xsavec64: + intrinsicName = "x86.xsavec64"; + break; + case X86::BI__builtin_ia32_xsaves: + intrinsicName = "x86.xsaves"; + break; + case X86::BI__builtin_ia32_xsaves64: + intrinsicName = "x86.xsaves64"; + break; + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: + intrinsicName = "x86.xsetbv"; + break; + } + + // The xsave family of instructions take a 64-bit mask that specifies + // which processor state components to save/restore. The hardware expects + // this mask split into two 32-bit registers: EDX (high 32 bits) and + // EAX (low 32 bits). + mlir::Type i32Ty = builder.getSInt32Ty(); + + // Mhi = (uint32_t)(ops[1] >> 32) - extract high 32 bits via right shift + cir::ConstantOp shift32 = builder.getSInt64(32, loc); + mlir::Value mhi = builder.createShift(loc, ops[1], shift32.getResult(), + /*isShiftLeft=*/false); + mhi = builder.createIntCast(mhi, i32Ty); + + // Mlo = (uint32_t)ops[1] - extract low 32 bits by truncation + mlir::Value mlo = builder.createIntCast(ops[1], i32Ty); + + return emitIntrinsicCallOp(builder, loc, intrinsicName, voidTy, + mlir::ValueRange{ops[0], mhi, mlo}); + } case X86::BI__builtin_ia32_xgetbv: case X86::BI_xgetbv: + // xgetbv reads the extended control register specified by ops[0] (ECX) + // and returns the 64-bit value + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "x86.xgetbv", builder.getUInt64Ty(), ops[0]); case X86::BI__builtin_ia32_storedqudi128_mask: case X86::BI__builtin_ia32_storedqusi128_mask: case X86::BI__builtin_ia32_storedquhi128_mask: @@ -160,13 +674,9 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vcvtuw2ph512_mask: case X86::BI__builtin_ia32_vcvtudq2ph512_mask: case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: - case X86::BI__builtin_ia32_vfmaddss3: - case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddsh3_mask: case X86::BI__builtin_ia32_vfmaddss3_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: - case X86::BI__builtin_ia32_vfmaddss: - case X86::BI__builtin_ia32_vfmaddsd: case X86::BI__builtin_ia32_vfmaddsh3_maskz: case X86::BI__builtin_ia32_vfmaddss3_maskz: case X86::BI__builtin_ia32_vfmaddsd3_maskz: @@ -282,6 +792,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_compressstoreqi128_mask: case X86::BI__builtin_ia32_compressstoreqi256_mask: case X86::BI__builtin_ia32_compressstoreqi512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_expanddf128_mask: case X86::BI__builtin_ia32_expanddf256_mask: case X86::BI__builtin_ia32_expanddf512_mask: @@ -299,7 +813,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_expandhi512_mask: case X86::BI__builtin_ia32_expandqi128_mask: case X86::BI__builtin_ia32_expandqi256_mask: - case X86::BI__builtin_ia32_expandqi512_mask: + case X86::BI__builtin_ia32_expandqi512_mask: { + mlir::Location loc = getLoc(expr->getExprLoc()); + return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], + "x86.avx512.mask.expand"); + } case X86::BI__builtin_ia32_compressdf128_mask: case X86::BI__builtin_ia32_compressdf256_mask: case X86::BI__builtin_ia32_compressdf512_mask: @@ -317,7 +835,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_compresshi512_mask: case X86::BI__builtin_ia32_compressqi128_mask: case X86::BI__builtin_ia32_compressqi256_mask: - case X86::BI__builtin_ia32_compressqi512_mask: + case X86::BI__builtin_ia32_compressqi512_mask: { + mlir::Location loc = getLoc(expr->getExprLoc()); + return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], + "x86.avx512.mask.compress"); + } case X86::BI__builtin_ia32_gather3div2df: case X86::BI__builtin_ia32_gather3div2di: case X86::BI__builtin_ia32_gather3div4df: @@ -341,7 +863,93 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_gathersiv8di: case X86::BI__builtin_ia32_gathersiv16si: case X86::BI__builtin_ia32_gatherdiv8di: - case X86::BI__builtin_ia32_gatherdiv16si: + case X86::BI__builtin_ia32_gatherdiv16si: { + StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + intrinsicName = "x86.avx512.mask.gather3div2.df"; + break; + case X86::BI__builtin_ia32_gather3div2di: + intrinsicName = "x86.avx512.mask.gather3div2.di"; + break; + case X86::BI__builtin_ia32_gather3div4df: + intrinsicName = "x86.avx512.mask.gather3div4.df"; + break; + case X86::BI__builtin_ia32_gather3div4di: + intrinsicName = "x86.avx512.mask.gather3div4.di"; + break; + case X86::BI__builtin_ia32_gather3div4sf: + intrinsicName = "x86.avx512.mask.gather3div4.sf"; + break; + case X86::BI__builtin_ia32_gather3div4si: + intrinsicName = "x86.avx512.mask.gather3div4.si"; + break; + case X86::BI__builtin_ia32_gather3div8sf: + intrinsicName = "x86.avx512.mask.gather3div8.sf"; + break; + case X86::BI__builtin_ia32_gather3div8si: + intrinsicName = "x86.avx512.mask.gather3div8.si"; + break; + case X86::BI__builtin_ia32_gather3siv2df: + intrinsicName = "x86.avx512.mask.gather3siv2.df"; + break; + case X86::BI__builtin_ia32_gather3siv2di: + intrinsicName = "x86.avx512.mask.gather3siv2.di"; + break; + case X86::BI__builtin_ia32_gather3siv4df: + intrinsicName = "x86.avx512.mask.gather3siv4.df"; + break; + case X86::BI__builtin_ia32_gather3siv4di: + intrinsicName = "x86.avx512.mask.gather3siv4.di"; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + intrinsicName = "x86.avx512.mask.gather3siv4.sf"; + break; + case X86::BI__builtin_ia32_gather3siv4si: + intrinsicName = "x86.avx512.mask.gather3siv4.si"; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + intrinsicName = "x86.avx512.mask.gather3siv8.sf"; + break; + case X86::BI__builtin_ia32_gather3siv8si: + intrinsicName = "x86.avx512.mask.gather3siv8.si"; + break; + case X86::BI__builtin_ia32_gathersiv8df: + intrinsicName = "x86.avx512.mask.gather.dpd.512"; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + intrinsicName = "x86.avx512.mask.gather.dps.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + intrinsicName = "x86.avx512.mask.gather.qpd.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + intrinsicName = "x86.avx512.mask.gather.qps.512"; + break; + case X86::BI__builtin_ia32_gathersiv8di: + intrinsicName = "x86.avx512.mask.gather.dpq.512"; + break; + case X86::BI__builtin_ia32_gathersiv16si: + intrinsicName = "x86.avx512.mask.gather.dpi.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + intrinsicName = "x86.avx512.mask.gather.qpq.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + intrinsicName = "x86.avx512.mask.gather.qpi.512"; + break; + } + + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned minElts = + std::min(cast<cir::VectorType>(ops[0].getType()).getSize(), + cast<cir::VectorType>(ops[2].getType()).getSize()); + ops[3] = getMaskVecValue(builder, loc, ops[3], minElts); + return emitIntrinsicCallOp(builder, loc, intrinsicName, + convertType(expr->getType()), ops); + } case X86::BI__builtin_ia32_scattersiv8df: case X86::BI__builtin_ia32_scattersiv16sf: case X86::BI__builtin_ia32_scatterdiv8df: @@ -365,7 +973,94 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_scattersiv4sf: case X86::BI__builtin_ia32_scattersiv4si: case X86::BI__builtin_ia32_scattersiv8sf: - case X86::BI__builtin_ia32_scattersiv8si: + case X86::BI__builtin_ia32_scattersiv8si: { + llvm::StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_scattersiv8df: + intrinsicName = "x86.avx512.mask.scatter.dpd.512"; + break; + case X86::BI__builtin_ia32_scattersiv16sf: + intrinsicName = "x86.avx512.mask.scatter.dps.512"; + break; + case X86::BI__builtin_ia32_scatterdiv8df: + intrinsicName = "x86.avx512.mask.scatter.qpd.512"; + break; + case X86::BI__builtin_ia32_scatterdiv16sf: + intrinsicName = "x86.avx512.mask.scatter.qps.512"; + break; + case X86::BI__builtin_ia32_scattersiv8di: + intrinsicName = "x86.avx512.mask.scatter.dpq.512"; + break; + case X86::BI__builtin_ia32_scattersiv16si: + intrinsicName = "x86.avx512.mask.scatter.dpi.512"; + break; + case X86::BI__builtin_ia32_scatterdiv8di: + intrinsicName = "x86.avx512.mask.scatter.qpq.512"; + break; + case X86::BI__builtin_ia32_scatterdiv16si: + intrinsicName = "x86.avx512.mask.scatter.qpi.512"; + break; + case X86::BI__builtin_ia32_scatterdiv2df: + intrinsicName = "x86.avx512.mask.scatterdiv2.df"; + break; + case X86::BI__builtin_ia32_scatterdiv2di: + intrinsicName = "x86.avx512.mask.scatterdiv2.di"; + break; + case X86::BI__builtin_ia32_scatterdiv4df: + intrinsicName = "x86.avx512.mask.scatterdiv4.df"; + break; + case X86::BI__builtin_ia32_scatterdiv4di: + intrinsicName = "x86.avx512.mask.scatterdiv4.di"; + break; + case X86::BI__builtin_ia32_scatterdiv4sf: + intrinsicName = "x86.avx512.mask.scatterdiv4.sf"; + break; + case X86::BI__builtin_ia32_scatterdiv4si: + intrinsicName = "x86.avx512.mask.scatterdiv4.si"; + break; + case X86::BI__builtin_ia32_scatterdiv8sf: + intrinsicName = "x86.avx512.mask.scatterdiv8.sf"; + break; + case X86::BI__builtin_ia32_scatterdiv8si: + intrinsicName = "x86.avx512.mask.scatterdiv8.si"; + break; + case X86::BI__builtin_ia32_scattersiv2df: + intrinsicName = "x86.avx512.mask.scattersiv2.df"; + break; + case X86::BI__builtin_ia32_scattersiv2di: + intrinsicName = "x86.avx512.mask.scattersiv2.di"; + break; + case X86::BI__builtin_ia32_scattersiv4df: + intrinsicName = "x86.avx512.mask.scattersiv4.df"; + break; + case X86::BI__builtin_ia32_scattersiv4di: + intrinsicName = "x86.avx512.mask.scattersiv4.di"; + break; + case X86::BI__builtin_ia32_scattersiv4sf: + intrinsicName = "x86.avx512.mask.scattersiv4.sf"; + break; + case X86::BI__builtin_ia32_scattersiv4si: + intrinsicName = "x86.avx512.mask.scattersiv4.si"; + break; + case X86::BI__builtin_ia32_scattersiv8sf: + intrinsicName = "x86.avx512.mask.scattersiv8.sf"; + break; + case X86::BI__builtin_ia32_scattersiv8si: + intrinsicName = "x86.avx512.mask.scattersiv8.si"; + break; + } + + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned minElts = + std::min(cast<cir::VectorType>(ops[2].getType()).getSize(), + cast<cir::VectorType>(ops[3].getType()).getSize()); + ops[1] = getMaskVecValue(builder, loc, ops[1], minElts); + + return emitIntrinsicCallOp(builder, loc, intrinsicName, + convertType(expr->getType()), ops); + } case X86::BI__builtin_ia32_vextractf128_pd256: case X86::BI__builtin_ia32_vextractf128_ps256: case X86::BI__builtin_ia32_vextractf128_si256: @@ -408,12 +1103,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_pblendw256: case X86::BI__builtin_ia32_pblendd128: case X86::BI__builtin_ia32_pblendd256: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_pshuflw: case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: + return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), + true); case X86::BI__builtin_ia32_pshufhw: case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: + return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()), + false); case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: @@ -422,13 +1125,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_vpermilps256: case X86::BI__builtin_ia32_vpermilpd512: - case X86::BI__builtin_ia32_vpermilps512: + case X86::BI__builtin_ia32_vpermilps512: { + const uint32_t imm = getSExtIntValueFromConstOp(ops[1]); + + llvm::SmallVector<int64_t, 16> mask(16); + computeFullLaneShuffleMask(*this, ops[0], imm, false, mask); + + return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask); + } case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: case X86::BI__builtin_ia32_shufpd512: case X86::BI__builtin_ia32_shufps: case X86::BI__builtin_ia32_shufps256: - case X86::BI__builtin_ia32_shufps512: + case X86::BI__builtin_ia32_shufps512: { + const uint32_t imm = getZExtIntValueFromConstOp(ops[2]); + + llvm::SmallVector<int64_t, 16> mask(16); + computeFullLaneShuffleMask(*this, ops[0], imm, true, mask); + + return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1], + mask); + } case X86::BI__builtin_ia32_permdi256: case X86::BI__builtin_ia32_permdf256: case X86::BI__builtin_ia32_permdi512: @@ -460,14 +1178,58 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_psrldqi128_byteshift: case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_kshiftliqi: case X86::BI__builtin_ia32_kshiftlihi: case X86::BI__builtin_ia32_kshiftlisi: - case X86::BI__builtin_ia32_kshiftlidi: + case X86::BI__builtin_ia32_kshiftlidi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned shiftVal = + ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() & + 0xff; + unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth(); + + if (shiftVal >= numElems) + return builder.getNullValue(ops[0].getType(), loc); + + mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); + + SmallVector<mlir::Attribute, 64> indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq<unsigned>(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal)); + + mlir::Value zero = builder.getNullValue(in.getType(), loc); + mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices); + return builder.createBitcast(sv, ops[0].getType()); + } case X86::BI__builtin_ia32_kshiftriqi: case X86::BI__builtin_ia32_kshiftrihi: case X86::BI__builtin_ia32_kshiftrisi: - case X86::BI__builtin_ia32_kshiftridi: + case X86::BI__builtin_ia32_kshiftridi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + unsigned shiftVal = + ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() & + 0xff; + unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth(); + + if (shiftVal >= numElems) + return builder.getNullValue(ops[0].getType(), loc); + + mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems); + + SmallVector<mlir::Attribute, 64> indices; + mlir::Type i32Ty = builder.getSInt32Ty(); + for (auto i : llvm::seq<unsigned>(0, numElems)) + indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal)); + + mlir::Value zero = builder.getNullValue(in.getType(), loc); + mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices); + return builder.createBitcast(sv, ops[0].getType()); + } case X86::BI__builtin_ia32_vprotbi: case X86::BI__builtin_ia32_vprotwi: case X86::BI__builtin_ia32_vprotdi: @@ -478,12 +1240,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_prolq128: case X86::BI__builtin_ia32_prolq256: case X86::BI__builtin_ia32_prolq512: + return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], + ops[0], ops[1], false); case X86::BI__builtin_ia32_prord128: case X86::BI__builtin_ia32_prord256: case X86::BI__builtin_ia32_prord512: case X86::BI__builtin_ia32_prorq128: case X86::BI__builtin_ia32_prorq256: case X86::BI__builtin_ia32_prorq512: + return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0], + ops[0], ops[1], true); case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: case X86::BI__builtin_ia32_selectb_512: @@ -536,86 +1302,166 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_ucmpq128_mask: case X86::BI__builtin_ia32_ucmpq256_mask: case X86::BI__builtin_ia32_ucmpq512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_vpcomb: case X86::BI__builtin_ia32_vpcomw: case X86::BI__builtin_ia32_vpcomd: case X86::BI__builtin_ia32_vpcomq: + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true); case X86::BI__builtin_ia32_vpcomub: case X86::BI__builtin_ia32_vpcomuw: case X86::BI__builtin_ia32_vpcomud: case X86::BI__builtin_ia32_vpcomuq: + return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: case X86::BI__builtin_ia32_kortestcsi: - case X86::BI__builtin_ia32_kortestcdi: + case X86::BI__builtin_ia32_kortestcdi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + cir::IntType ty = cast<cir::IntType>(ops[0].getType()); + mlir::Value allOnesOp = + builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth())); + mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); + mlir::Value cmp = + cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp); + return builder.createCast(cir::CastKind::bool_to_int, cmp, + cgm.convertType(expr->getType())); + } case X86::BI__builtin_ia32_kortestzqi: case X86::BI__builtin_ia32_kortestzhi: case X86::BI__builtin_ia32_kortestzsi: - case X86::BI__builtin_ia32_kortestzdi: + case X86::BI__builtin_ia32_kortestzdi: { + mlir::Location loc = getLoc(expr->getExprLoc()); + cir::IntType ty = cast<cir::IntType>(ops[0].getType()); + mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult(); + mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops); + mlir::Value cmp = + cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp); + return builder.createCast(cir::CastKind::bool_to_int, cmp, + cgm.convertType(expr->getType())); + } case X86::BI__builtin_ia32_ktestcqi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.b", ops); case X86::BI__builtin_ia32_ktestzqi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.b", ops); case X86::BI__builtin_ia32_ktestchi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.w", ops); case X86::BI__builtin_ia32_ktestzhi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.w", ops); case X86::BI__builtin_ia32_ktestcsi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.d", ops); case X86::BI__builtin_ia32_ktestzsi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.d", ops); case X86::BI__builtin_ia32_ktestcdi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestc.q", ops); case X86::BI__builtin_ia32_ktestzdi: + return emitX86MaskTest(builder, getLoc(expr->getExprLoc()), + "x86.avx512.ktestz.q", ops); case X86::BI__builtin_ia32_kaddqi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.b", ops); case X86::BI__builtin_ia32_kaddhi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.w", ops); case X86::BI__builtin_ia32_kaddsi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.d", ops); case X86::BI__builtin_ia32_kadddi: + return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kadd.q", ops); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: case X86::BI__builtin_ia32_kanddi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::And, ops); case X86::BI__builtin_ia32_kandnqi: case X86::BI__builtin_ia32_kandnhi: case X86::BI__builtin_ia32_kandnsi: case X86::BI__builtin_ia32_kandndi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::And, ops, true); case X86::BI__builtin_ia32_korqi: case X86::BI__builtin_ia32_korhi: case X86::BI__builtin_ia32_korsi: case X86::BI__builtin_ia32_kordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Or, ops); case X86::BI__builtin_ia32_kxnorqi: case X86::BI__builtin_ia32_kxnorhi: case X86::BI__builtin_ia32_kxnorsi: case X86::BI__builtin_ia32_kxnordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Xor, ops, true); case X86::BI__builtin_ia32_kxorqi: case X86::BI__builtin_ia32_kxorhi: case X86::BI__builtin_ia32_kxorsi: case X86::BI__builtin_ia32_kxordi: + return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()), + cir::BinOpKind::Xor, ops); case X86::BI__builtin_ia32_knotqi: case X86::BI__builtin_ia32_knothi: case X86::BI__builtin_ia32_knotsi: - case X86::BI__builtin_ia32_knotdi: + case X86::BI__builtin_ia32_knotdi: { + cir::IntType intTy = cast<cir::IntType>(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value resVec = + getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); + return builder.createBitcast(builder.createNot(resVec), ops[0].getType()); + } case X86::BI__builtin_ia32_kmovb: case X86::BI__builtin_ia32_kmovw: case X86::BI__builtin_ia32_kmovd: - case X86::BI__builtin_ia32_kmovq: - case X86::BI__builtin_ia32_kunpckdi: - case X86::BI__builtin_ia32_kunpcksi: - case X86::BI__builtin_ia32_kunpckhi: + case X86::BI__builtin_ia32_kmovq: { + // Bitcast to vXi1 type and then back to integer. This gets the mask + // register type into the IR, but might be optimized out depending on + // what's around it. + cir::IntType intTy = cast<cir::IntType>(ops[0].getType()); + unsigned numElts = intTy.getWidth(); + mlir::Value resVec = + getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); + return builder.createBitcast(resVec, ops[0].getType()); + } case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: - case X86::BI__builtin_ia32_sqrtpd256: - case X86::BI__builtin_ia32_sqrtpd: - case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: - case X86::BI__builtin_ia32_sqrtph256: - case X86::BI__builtin_ia32_sqrtph: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_sqrtph512: - case X86::BI__builtin_ia32_vsqrtbf16256: - case X86::BI__builtin_ia32_vsqrtbf16: - case X86::BI__builtin_ia32_vsqrtbf16512: case X86::BI__builtin_ia32_sqrtps512: - case X86::BI__builtin_ia32_sqrtpd512: + case X86::BI__builtin_ia32_sqrtpd512: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value arg = ops[0]; + return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult(); + } case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: - case X86::BI__builtin_ia32_pmuludq512: + case X86::BI__builtin_ia32_pmuludq512: { + unsigned opTypePrimitiveSizeInBits = + cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); + return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false, + ops, opTypePrimitiveSizeInBits); + } case X86::BI__builtin_ia32_pmuldq128: case X86::BI__builtin_ia32_pmuldq256: - case X86::BI__builtin_ia32_pmuldq512: + case X86::BI__builtin_ia32_pmuldq512: { + unsigned opTypePrimitiveSizeInBits = + cgm.getDataLayout().getTypeSizeInBits(ops[0].getType()); + return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true, + ops, opTypePrimitiveSizeInBits); + } case X86::BI__builtin_ia32_pternlogd512_mask: case X86::BI__builtin_ia32_pternlogq512_mask: case X86::BI__builtin_ia32_pternlogd128_mask: @@ -710,10 +1556,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cmpunordpd: case X86::BI__builtin_ia32_cmpneqps: case X86::BI__builtin_ia32_cmpneqpd: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_cmpnltps: case X86::BI__builtin_ia32_cmpnltpd: + return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), + cir::CmpOpKind::lt, /*shouldInvert=*/true); case X86::BI__builtin_ia32_cmpnleps: case X86::BI__builtin_ia32_cmpnlepd: + return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()), + cir::CmpOpKind::le, /*shouldInvert=*/true); case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: case X86::BI__builtin_ia32_cmpph128_mask: @@ -752,7 +1606,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vcvtph2ps256_mask: case X86::BI__builtin_ia32_vcvtph2ps512_mask: case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: - case X86::BI__builtin_ia32_cvtsbf162ss_32: case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: case X86::BI__cpuid: @@ -798,7 +1651,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: case X86::BI__builtin_ia32_prefetchi: - cgm.errorNYI(e->getSourceRange(), + cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented X86 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return {}; diff --git a/clang/lib/CIR/CodeGen/CIRGenCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp index a3e2081..71568ec 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp @@ -135,15 +135,27 @@ static void emitDeclDestroy(CIRGenFunction &cgf, const VarDecl *vd, // call right here. auto gd = GlobalDecl(dtor, Dtor_Complete); fnOp = cgm.getAddrAndTypeOfCXXStructor(gd).second; - cgf.getBuilder().createCallOp( - cgf.getLoc(vd->getSourceRange()), - mlir::FlatSymbolRefAttr::get(fnOp.getSymNameAttr()), - mlir::ValueRange{cgm.getAddrOfGlobalVar(vd)}); + builder.createCallOp(cgf.getLoc(vd->getSourceRange()), + mlir::FlatSymbolRefAttr::get(fnOp.getSymNameAttr()), + mlir::ValueRange{cgm.getAddrOfGlobalVar(vd)}); + assert(fnOp && "expected cir.func"); + // TODO(cir): This doesn't do anything but check for unhandled conditions. + // What it is meant to do should really be happening in LoweringPrepare. + cgm.getCXXABI().registerGlobalDtor(vd, fnOp, nullptr); } else { - cgm.errorNYI(vd->getSourceRange(), "array destructor"); + // Otherwise, a custom destroyed is needed. Classic codegen creates a helper + // function here and emits the destroy into the helper function, which is + // called from __cxa_atexit. + // In CIR, we just emit the destroy into the dtor region. It will be moved + // into a separate function during the LoweringPrepare pass. + // FIXME(cir): We should create a new operation here to explicitly get the + // address of the global into whose dtor region we are emiiting the destroy. + // The same applies to code above where it is calling getAddrOfGlobalVar. + mlir::Value globalVal = builder.createGetGlobal(addr); + CharUnits alignment = cgf.getContext().getDeclAlign(vd); + Address globalAddr{globalVal, cgf.convertTypeForMem(type), alignment}; + cgf.emitDestroy(globalAddr, type, cgf.getDestroyer(dtorKind)); } - assert(fnOp && "expected cir.func"); - cgm.getCXXABI().registerGlobalDtor(vd, fnOp, nullptr); builder.setInsertionPointToEnd(block); if (block->empty()) { diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h index 13dc9f3..57b1a1f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h @@ -192,6 +192,9 @@ public: QualType elementType, const CXXDestructorDecl *dtor) = 0; + virtual size_t getSrcArgforCopyCtor(const CXXConstructorDecl *, + FunctionArgList &args) const = 0; + /// Checks if ABI requires extra virtual offset for vtable field. virtual bool isVirtualOffsetNeededForVTableField(CIRGenFunction &cgf, diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 50d4c03..17f0c6d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -465,12 +465,47 @@ static cir::CIRCallOpInterface emitCallLikeOp(CIRGenFunction &cgf, mlir::Location callLoc, cir::FuncType indirectFuncTy, mlir::Value indirectFuncVal, cir::FuncOp directFuncOp, - const SmallVectorImpl<mlir::Value> &cirCallArgs, + const SmallVectorImpl<mlir::Value> &cirCallArgs, bool isInvoke, const mlir::NamedAttrList &attrs) { CIRGenBuilderTy &builder = cgf.getBuilder(); assert(!cir::MissingFeatures::opCallSurroundingTry()); - assert(!cir::MissingFeatures::invokeOp()); + + if (isInvoke) { + // This call may throw and requires catch and/or cleanup handling. + // If this call does not appear within the `try` region of an existing + // TryOp, we must create a synthetic TryOp to contain the call. This + // happens when a call that may throw appears within a cleanup + // scope. + + // In OG, we build the landing pad for this scope. In CIR, we emit a + // synthetic cir.try because this didn't come from code generating from a + // try/catch in C++. + assert(cgf.curLexScope && "expected scope"); + cir::TryOp tryOp = cgf.curLexScope->getClosestTryParent(); + if (!tryOp) { + cgf.cgm.errorNYI( + "emitCallLikeOp: call does not have an associated cir.try"); + return {}; + } + + if (tryOp.getSynthetic()) { + cgf.cgm.errorNYI("emitCallLikeOp: tryOp synthetic"); + return {}; + } + + cir::CallOp callOpWithExceptions; + if (indirectFuncTy) { + cgf.cgm.errorNYI("emitCallLikeOp: indirect function type"); + return {}; + } + + callOpWithExceptions = + builder.createCallOp(callLoc, directFuncOp, cirCallArgs); + + cgf.populateCatchHandlersIfRequired(tryOp); + return callOpWithExceptions; + } assert(builder.getInsertionBlock() && "expected valid basic block"); @@ -601,8 +636,6 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo, assert(!cir::MissingFeatures::opCallAttrs()); cgm.constructAttributeList(callee.getAbstractInfo(), attrs); - assert(!cir::MissingFeatures::invokeOp()); - cir::FuncType indirectFuncTy; mlir::Value indirectFuncVal; cir::FuncOp directFuncOp; @@ -628,10 +661,17 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo, indirectFuncVal = calleePtr->getResult(0); } + assert(!cir::MissingFeatures::msvcCXXPersonality()); + assert(!cir::MissingFeatures::functionUsesSEHTry()); + assert(!cir::MissingFeatures::nothrowAttr()); + + bool cannotThrow = attrs.getNamed("nothrow").has_value(); + bool isInvoke = !cannotThrow && isCatchOrCleanupRequired(); + mlir::Location callLoc = loc; cir::CIRCallOpInterface theCall = emitCallLikeOp(*this, loc, indirectFuncTy, indirectFuncVal, directFuncOp, - cirCallArgs, attrs); + cirCallArgs, isInvoke, attrs); if (callOp) *callOp = theCall; diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index a829678..2a26e38 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -18,6 +18,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/Type.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/MissingFeatures.h" using namespace clang; @@ -110,8 +111,23 @@ static void emitMemberInitializer(CIRGenFunction &cgf, // NOTE(cir): CodeGen allows record types to be memcpy'd if applicable, // whereas ClangIR wants to represent all object construction explicitly. if (!baseElementTy->isRecordType()) { - cgf.cgm.errorNYI(memberInit->getSourceRange(), - "emitMemberInitializer: array of non-record type"); + unsigned srcArgIndex = + cgf.cgm.getCXXABI().getSrcArgforCopyCtor(constructor, args); + cir::LoadOp srcPtr = cgf.getBuilder().createLoad( + cgf.getLoc(memberInit->getSourceLocation()), + cgf.getAddrOfLocalVar(args[srcArgIndex])); + LValue thisRhslv = cgf.makeNaturalAlignAddrLValue(srcPtr, recordTy); + LValue src = cgf.emitLValueForFieldInitialization(thisRhslv, field, + field->getName()); + + // Copy the aggregate. + cgf.emitAggregateCopy(lhs, src, fieldType, + cgf.getOverlapForFieldInit(field), + lhs.isVolatileQualified()); + // Ensure that we destroy the objects if an exception is thrown later in + // the constructor. + assert(!cgf.needsEHCleanup(fieldType.isDestructedType()) && + "Arrays of non-record types shouldn't need EH cleanup"); return; } } @@ -133,7 +149,7 @@ struct CallBaseDtor final : EHScopeStack::Cleanup { CallBaseDtor(const CXXRecordDecl *base, bool baseIsVirtual) : baseClass(base), baseIsVirtual(baseIsVirtual) {} - void emit(CIRGenFunction &cgf) override { + void emit(CIRGenFunction &cgf, Flags flags) override { const CXXRecordDecl *derivedClass = cast<CXXMethodDecl>(cgf.curFuncDecl)->getParent(); @@ -786,6 +802,8 @@ void CIRGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &args) { "Body of an implicit assignment operator should be compound stmt."); const auto *rootCS = cast<CompoundStmt>(rootS); + cgm.setCXXSpecialMemberAttr(cast<cir::FuncOp>(curFn), assignOp); + assert(!cir::MissingFeatures::incrementProfileCounter()); assert(!cir::MissingFeatures::runCleanupsScope()); @@ -906,7 +924,7 @@ mlir::Value loadThisForDtorDelete(CIRGenFunction &cgf, struct CallDtorDelete final : EHScopeStack::Cleanup { CallDtorDelete() {} - void emit(CIRGenFunction &cgf) override { + void emit(CIRGenFunction &cgf, Flags flags) override { const CXXDestructorDecl *dtor = cast<CXXDestructorDecl>(cgf.curFuncDecl); const CXXRecordDecl *classDecl = dtor->getParent(); cgf.emitDeleteCall(dtor->getOperatorDelete(), @@ -923,7 +941,7 @@ public: DestroyField(const FieldDecl *field, CIRGenFunction::Destroyer *destroyer) : field(field), destroyer(destroyer) {} - void emit(CIRGenFunction &cgf) override { + void emit(CIRGenFunction &cgf, Flags flags) override { // Find the address of the field. Address thisValue = cgf.loadCXXThisAddress(); CanQualType recordTy = @@ -932,7 +950,7 @@ public: LValue lv = cgf.emitLValueForField(thisLV, field); assert(lv.isSimple()); - assert(!cir::MissingFeatures::ehCleanupFlags()); + assert(!cir::MissingFeatures::useEHCleanupForArray()); cgf.emitDestroy(lv.getAddress(), field->getType(), destroyer); } }; @@ -1029,7 +1047,7 @@ void CIRGenFunction::enterDtorCleanups(const CXXDestructorDecl *dd, continue; CleanupKind cleanupKind = getCleanupKind(dtorKind); - assert(!cir::MissingFeatures::ehCleanupFlags()); + assert(!cir::MissingFeatures::useEHCleanupForArray()); ehStack.pushCleanup<DestroyField>(cleanupKind, field, getDestroyer(dtorKind)); } @@ -1110,6 +1128,25 @@ mlir::Value CIRGenFunction::getVTTParameter(GlobalDecl gd, bool forVirtualBase, } } +Address CIRGenFunction::getAddressOfDerivedClass( + mlir::Location loc, Address baseAddr, const CXXRecordDecl *derived, + llvm::iterator_range<CastExpr::path_const_iterator> path, + bool nullCheckValue) { + assert(!path.empty() && "Base path should not be empty!"); + + QualType derivedTy = getContext().getCanonicalTagType(derived); + mlir::Type derivedValueTy = convertType(derivedTy); + CharUnits nonVirtualOffset = + cgm.computeNonVirtualBaseClassOffset(derived, path); + + // Note that in OG, no offset (nonVirtualOffset.getQuantity() == 0) means it + // just gives the address back. In CIR a `cir.derived_class` is created and + // made into a nop later on during lowering. + return builder.createDerivedClassAddr(loc, baseAddr, derivedValueTy, + nonVirtualOffset.getQuantity(), + /*assumeNotNull=*/!nullCheckValue); +} + Address CIRGenFunction::getAddressOfBaseClass( Address value, const CXXRecordDecl *derived, llvm::iterator_range<CastExpr::path_const_iterator> path, diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp index 851328a..6c6cb40 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp @@ -147,8 +147,8 @@ void *EHScopeStack::pushCleanup(CleanupKind kind, size_t size) { assert(!cir::MissingFeatures::innermostEHScope()); - EHCleanupScope *scope = new (buffer) - EHCleanupScope(size, branchFixups.size(), innermostNormalCleanup); + EHCleanupScope *scope = new (buffer) EHCleanupScope( + size, branchFixups.size(), innermostNormalCleanup, innermostEHScope); if (isNormalCleanup) innermostNormalCleanup = stable_begin(); @@ -188,18 +188,34 @@ void EHScopeStack::popCleanup() { } } +bool EHScopeStack::requiresCatchOrCleanup() const { + for (stable_iterator si = getInnermostEHScope(); si != stable_end();) { + if (auto *cleanup = dyn_cast<EHCleanupScope>(&*find(si))) { + if (cleanup->isLifetimeMarker()) { + // Skip lifetime markers and continue from the enclosing EH scope + assert(!cir::MissingFeatures::emitLifetimeMarkers()); + continue; + } + } + return true; + } + return false; +} + EHCatchScope *EHScopeStack::pushCatch(unsigned numHandlers) { char *buffer = allocate(EHCatchScope::getSizeForNumHandlers(numHandlers)); - assert(!cir::MissingFeatures::innermostEHScope()); - EHCatchScope *scope = new (buffer) EHCatchScope(numHandlers); + EHCatchScope *scope = + new (buffer) EHCatchScope(numHandlers, innermostEHScope); + innermostEHScope = stable_begin(); return scope; } -static void emitCleanup(CIRGenFunction &cgf, EHScopeStack::Cleanup *cleanup) { +static void emitCleanup(CIRGenFunction &cgf, EHScopeStack::Cleanup *cleanup, + EHScopeStack::Cleanup::Flags flags) { // Ask the cleanup to emit itself. assert(cgf.haveInsertPoint() && "expected insertion point"); - assert(!cir::MissingFeatures::ehCleanupFlags()); - cleanup->emit(cgf); + assert(!cir::MissingFeatures::ehCleanupActiveFlag()); + cleanup->emit(cgf, flags); assert(cgf.haveInsertPoint() && "cleanup ended with no insertion point?"); } @@ -269,7 +285,11 @@ void CIRGenFunction::popCleanupBlock() { reinterpret_cast<EHScopeStack::Cleanup *>(cleanupBufferHeap.get()); } - assert(!cir::MissingFeatures::ehCleanupFlags()); + EHScopeStack::Cleanup::Flags cleanupFlags; + if (scope.isNormalCleanup()) + cleanupFlags.setIsNormalCleanupKind(); + if (scope.isEHCleanup()) + cleanupFlags.setIsEHCleanupKind(); // If we have a fallthrough and no other need for the cleanup, // emit it directly. @@ -277,7 +297,7 @@ void CIRGenFunction::popCleanupBlock() { assert(!cir::MissingFeatures::ehCleanupScopeRequiresEHCleanup()); ehStack.popCleanup(); scope.markEmitted(); - emitCleanup(*this, cleanup); + emitCleanup(*this, cleanup, cleanupFlags); } else { // Otherwise, the best approach is to thread everything through // the cleanup block and then try to clean up after ourselves. @@ -339,7 +359,7 @@ void CIRGenFunction::popCleanupBlock() { ehStack.popCleanup(); assert(ehStack.hasNormalCleanups() == hasEnclosingCleanups); - emitCleanup(*this, cleanup); + emitCleanup(*this, cleanup, cleanupFlags); // Append the prepared cleanup prologue from above. assert(!cir::MissingFeatures::cleanupAppendInsts()); diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h index 61a09a5..85dbde4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.h +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h @@ -18,6 +18,7 @@ #include "CIRGenModule.h" #include "EHScopeStack.h" #include "mlir/IR/Value.h" +#include "clang/AST/StmtCXX.h" namespace clang::CIRGen { @@ -30,12 +31,16 @@ struct CatchTypeInfo { /// A protected scope for zero-cost EH handling. class EHScope { + EHScopeStack::stable_iterator enclosingEHScope; + class CommonBitFields { friend class EHScope; unsigned kind : 3; }; enum { NumCommonBits = 3 }; + bool scopeMayThrow; + protected: class CatchBitFields { friend class EHCatchScope; @@ -79,7 +84,10 @@ protected: public: enum Kind { Cleanup, Catch, Terminate, Filter }; - EHScope(Kind kind) { commonBits.kind = kind; } + EHScope(Kind kind, EHScopeStack::stable_iterator enclosingEHScope) + : enclosingEHScope(enclosingEHScope) { + commonBits.kind = kind; + } Kind getKind() const { return static_cast<Kind>(commonBits.kind); } @@ -87,8 +95,13 @@ public: // Traditional LLVM codegen also checks for `!block->use_empty()`, but // in CIRGen the block content is not important, just used as a way to // signal `hasEHBranches`. - assert(!cir::MissingFeatures::ehstackBranches()); - return false; + return scopeMayThrow; + } + + void setMayThrow(bool mayThrow) { scopeMayThrow = mayThrow; } + + EHScopeStack::stable_iterator getEnclosingEHScope() const { + return enclosingEHScope; } }; @@ -111,6 +124,11 @@ public: /// The catch handler for this type. mlir::Region *region; + + /// The catch handler stmt. + const CXXCatchStmt *stmt; + + bool isCatchAll() const { return type.rtti == nullptr; } }; private: @@ -118,22 +136,36 @@ private: Handler *getHandlers() { return reinterpret_cast<Handler *>(this + 1); } + const Handler *getHandlers() const { + return reinterpret_cast<const Handler *>(this + 1); + } + public: static size_t getSizeForNumHandlers(unsigned n) { return sizeof(EHCatchScope) + n * sizeof(Handler); } - EHCatchScope(unsigned numHandlers) : EHScope(Catch) { + EHCatchScope(unsigned numHandlers, + EHScopeStack::stable_iterator enclosingEHScope) + : EHScope(Catch, enclosingEHScope) { catchBits.numHandlers = numHandlers; assert(catchBits.numHandlers == numHandlers && "NumHandlers overflow?"); } unsigned getNumHandlers() const { return catchBits.numHandlers; } - void setHandler(unsigned i, CatchTypeInfo type, mlir::Region *region) { + void setHandler(unsigned i, CatchTypeInfo type, mlir::Region *region, + const CXXCatchStmt *stmt) { assert(i < getNumHandlers()); - getHandlers()[i].type = type; - getHandlers()[i].region = region; + Handler *handler = &getHandlers()[i]; + handler->type = type; + handler->region = region; + handler->stmt = stmt; + } + + const Handler &getHandler(unsigned i) const { + assert(i < getNumHandlers()); + return getHandlers()[i]; } // Clear all handler blocks. @@ -144,6 +176,10 @@ public: // The blocks are owned by TryOp, nothing to delete. } + using iterator = const Handler *; + iterator begin() const { return getHandlers(); } + iterator end() const { return getHandlers() + getNumHandlers(); } + static bool classof(const EHScope *scope) { return scope->getKind() == Catch; } @@ -176,9 +212,10 @@ public: } EHCleanupScope(unsigned cleanupSize, unsigned fixupDepth, - EHScopeStack::stable_iterator enclosingNormal) - : EHScope(EHScope::Cleanup), enclosingNormal(enclosingNormal), - fixupDepth(fixupDepth) { + EHScopeStack::stable_iterator enclosingNormal, + EHScopeStack::stable_iterator enclosingEH) + : EHScope(EHScope::Cleanup, enclosingEH), + enclosingNormal(enclosingNormal), fixupDepth(fixupDepth) { // TODO(cir): When exception handling is upstreamed, isNormalCleanup and // isEHCleanup will be arguments to the constructor. cleanupBits.isNormalCleanup = true; @@ -200,10 +237,13 @@ public: void setNormalBlock(mlir::Block *bb) { normalBlock = bb; } bool isNormalCleanup() const { return cleanupBits.isNormalCleanup; } + bool isEHCleanup() const { return cleanupBits.isEHCleanup; } bool isActive() const { return cleanupBits.isActive; } void setActive(bool isActive) { cleanupBits.isActive = isActive; } + bool isLifetimeMarker() const { return cleanupBits.isLifetimeMarker; } + unsigned getFixupDepth() const { return fixupDepth; } EHScopeStack::stable_iterator getEnclosingNormalCleanup() const { return enclosingNormal; @@ -235,13 +275,45 @@ public: EHScope *get() const { return reinterpret_cast<EHScope *>(ptr); } + EHScope *operator->() const { return get(); } EHScope &operator*() const { return *get(); } + + iterator &operator++() { + size_t size; + switch (get()->getKind()) { + case EHScope::Catch: + size = EHCatchScope::getSizeForNumHandlers( + static_cast<const EHCatchScope *>(get())->getNumHandlers()); + break; + + case EHScope::Filter: + llvm_unreachable("EHScopeStack::iterator Filter"); + break; + + case EHScope::Cleanup: + llvm_unreachable("EHScopeStack::iterator Cleanup"); + break; + + case EHScope::Terminate: + llvm_unreachable("EHScopeStack::iterator Terminate"); + break; + } + ptr += llvm::alignTo(size, ScopeStackAlignment); + return *this; + } + + bool operator==(iterator other) const { return ptr == other.ptr; } + bool operator!=(iterator other) const { return ptr != other.ptr; } }; inline EHScopeStack::iterator EHScopeStack::begin() const { return iterator(startOfData); } +inline EHScopeStack::iterator EHScopeStack::end() const { + return iterator(endOfBuffer); +} + inline EHScopeStack::iterator EHScopeStack::find(stable_iterator savePoint) const { assert(savePoint.isValid() && "finding invalid savepoint"); @@ -254,7 +326,7 @@ inline void EHScopeStack::popCatch() { assert(!empty() && "popping exception stack when not empty"); EHCatchScope &scope = llvm::cast<EHCatchScope>(*begin()); - assert(!cir::MissingFeatures::innermostEHScope()); + innermostEHScope = scope.getEnclosingEHScope(); deallocate(EHCatchScope::getSizeForNumHandlers(scope.getNumHandlers())); } diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp index 930ae55..b4f185d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp @@ -13,6 +13,7 @@ #include "CIRGenFunction.h" #include "mlir/Support/LLVM.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetInfo.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" @@ -21,18 +22,93 @@ using namespace clang; using namespace clang::CIRGen; struct clang::CIRGen::CGCoroData { + // What is the current await expression kind and how many + // await/yield expressions were encountered so far. + // These are used to generate pretty labels for await expressions in LLVM IR. + cir::AwaitKind currentAwaitKind = cir::AwaitKind::Init; // Stores the __builtin_coro_id emitted in the function so that we can supply // it as the first argument to other builtins. cir::CallOp coroId = nullptr; // Stores the result of __builtin_coro_begin call. mlir::Value coroBegin = nullptr; + + // The promise type's 'unhandled_exception' handler, if it defines one. + Stmt *exceptionHandler = nullptr; }; // Defining these here allows to keep CGCoroData private to this file. CIRGenFunction::CGCoroInfo::CGCoroInfo() {} CIRGenFunction::CGCoroInfo::~CGCoroInfo() {} +namespace { +// FIXME: both GetParamRef and ParamReferenceReplacerRAII are good template +// candidates to be shared among LLVM / CIR codegen. + +// Hunts for the parameter reference in the parameter copy/move declaration. +struct GetParamRef : public StmtVisitor<GetParamRef> { +public: + DeclRefExpr *expr = nullptr; + GetParamRef() {} + void VisitDeclRefExpr(DeclRefExpr *e) { + assert(expr == nullptr && "multilple declref in param move"); + expr = e; + } + void VisitStmt(Stmt *s) { + for (Stmt *c : s->children()) { + if (c) + Visit(c); + } + } +}; + +// This class replaces references to parameters to their copies by changing +// the addresses in CGF.LocalDeclMap and restoring back the original values in +// its destructor. +struct ParamReferenceReplacerRAII { + CIRGenFunction::DeclMapTy savedLocals; + CIRGenFunction::DeclMapTy &localDeclMap; + + ParamReferenceReplacerRAII(CIRGenFunction::DeclMapTy &localDeclMap) + : localDeclMap(localDeclMap) {} + + void addCopy(const DeclStmt *pm) { + // Figure out what param it refers to. + + assert(pm->isSingleDecl()); + const VarDecl *vd = static_cast<const VarDecl *>(pm->getSingleDecl()); + const Expr *initExpr = vd->getInit(); + GetParamRef visitor; + visitor.Visit(const_cast<Expr *>(initExpr)); + assert(visitor.expr); + DeclRefExpr *dreOrig = visitor.expr; + auto *pd = dreOrig->getDecl(); + + auto it = localDeclMap.find(pd); + assert(it != localDeclMap.end() && "parameter is not found"); + savedLocals.insert({pd, it->second}); + + auto copyIt = localDeclMap.find(vd); + assert(copyIt != localDeclMap.end() && "parameter copy is not found"); + it->second = copyIt->getSecond(); + } + + ~ParamReferenceReplacerRAII() { + for (auto &&savedLocal : savedLocals) { + localDeclMap.insert({savedLocal.first, savedLocal.second}); + } + } +}; +} // namespace + +RValue CIRGenFunction::emitCoroutineFrame() { + if (curCoro.data && curCoro.data->coroBegin) { + return RValue::get(curCoro.data->coroBegin); + } + cgm.errorNYI("NYI"); + return RValue(); +} + static void createCoroData(CIRGenFunction &cgf, CIRGenFunction::CGCoroInfo &curCoro, cir::CallOp coroId) { @@ -149,7 +225,203 @@ CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt &s) { if (s.getReturnStmtOnAllocFailure()) cgm.errorNYI("handle coroutine return alloc failure"); - assert(!cir::MissingFeatures::generateDebugInfo()); - assert(!cir::MissingFeatures::emitBodyAndFallthrough()); + { + assert(!cir::MissingFeatures::generateDebugInfo()); + ParamReferenceReplacerRAII paramReplacer(localDeclMap); + // Create mapping between parameters and copy-params for coroutine + // function. + llvm::ArrayRef<const Stmt *> paramMoves = s.getParamMoves(); + assert((paramMoves.size() == 0 || (paramMoves.size() == fnArgs.size())) && + "ParamMoves and FnArgs should be the same size for coroutine " + "function"); + // For zipping the arg map into debug info. + assert(!cir::MissingFeatures::generateDebugInfo()); + + // Create parameter copies. We do it before creating a promise, since an + // evolution of coroutine TS may allow promise constructor to observe + // parameter copies. + assert(!cir::MissingFeatures::coroOutsideFrameMD()); + for (auto *pm : paramMoves) { + if (emitStmt(pm, /*useCurrentScope=*/true).failed()) + return mlir::failure(); + paramReplacer.addCopy(cast<DeclStmt>(pm)); + } + + if (emitStmt(s.getPromiseDeclStmt(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + // returnValue should be valid as long as the coroutine's return type + // is not void. The assertion could help us to reduce the check later. + assert(returnValue.isValid() == (bool)s.getReturnStmt()); + // Now we have the promise, initialize the GRO. + // We need to emit `get_return_object` first. According to: + // [dcl.fct.def.coroutine]p7 + // The call to get_return_Âobject is sequenced before the call to + // initial_suspend and is invoked at most once. + // + // So we couldn't emit return value when we emit return statment, + // otherwise the call to get_return_object wouldn't be in front + // of initial_suspend. + if (returnValue.isValid()) + emitAnyExprToMem(s.getReturnValue(), returnValue, + s.getReturnValue()->getType().getQualifiers(), + /*isInit*/ true); + + assert(!cir::MissingFeatures::ehCleanupScope()); + // FIXME(cir): EHStack.pushCleanup<CallCoroEnd>(EHCleanup); + curCoro.data->currentAwaitKind = cir::AwaitKind::Init; + if (emitStmt(s.getInitSuspendStmt(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + assert(!cir::MissingFeatures::emitBodyAndFallthrough()); + } return mlir::success(); } + +static bool memberCallExpressionCanThrow(const Expr *e) { + if (const auto *ce = dyn_cast<CXXMemberCallExpr>(e)) + if (const auto *proto = + ce->getMethodDecl()->getType()->getAs<FunctionProtoType>()) + if (isNoexceptExceptionSpec(proto->getExceptionSpecType()) && + proto->canThrow() == CT_Cannot) + return false; + return true; +} + +// Given a suspend expression which roughly looks like: +// +// auto && x = CommonExpr(); +// if (!x.await_ready()) { +// x.await_suspend(...); (*) +// } +// x.await_resume(); +// +// where the result of the entire expression is the result of x.await_resume() +// +// (*) If x.await_suspend return type is bool, it allows to veto a suspend: +// if (x.await_suspend(...)) +// llvm_coro_suspend(); +// +// This is more higher level than LLVM codegen, for that one see llvm's +// docs/Coroutines.rst for more details. +namespace { +struct LValueOrRValue { + LValue lv; + RValue rv; +}; +} // namespace + +static LValueOrRValue +emitSuspendExpression(CIRGenFunction &cgf, CGCoroData &coro, + CoroutineSuspendExpr const &s, cir::AwaitKind kind, + AggValueSlot aggSlot, bool ignoreResult, + mlir::Block *scopeParentBlock, + mlir::Value &tmpResumeRValAddr, bool forLValue) { + [[maybe_unused]] mlir::LogicalResult awaitBuild = mlir::success(); + LValueOrRValue awaitRes; + + CIRGenFunction::OpaqueValueMapping binder = + CIRGenFunction::OpaqueValueMapping(cgf, s.getOpaqueValue()); + CIRGenBuilderTy &builder = cgf.getBuilder(); + [[maybe_unused]] cir::AwaitOp awaitOp = cir::AwaitOp::create( + builder, cgf.getLoc(s.getSourceRange()), kind, + /*readyBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + Expr *condExpr = s.getReadyExpr()->IgnoreParens(); + builder.createCondition(cgf.evaluateExprAsBool(condExpr)); + }, + /*suspendBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + // Note that differently from LLVM codegen we do not emit coro.save + // and coro.suspend here, that should be done as part of lowering this + // to LLVM dialect (or some other MLIR dialect) + + // A invalid suspendRet indicates "void returning await_suspend" + mlir::Value suspendRet = cgf.emitScalarExpr(s.getSuspendExpr()); + + // Veto suspension if requested by bool returning await_suspend. + if (suspendRet) { + cgf.cgm.errorNYI("Veto await_suspend"); + } + + // Signals the parent that execution flows to next region. + cir::YieldOp::create(builder, loc); + }, + /*resumeBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + // Exception handling requires additional IR. If the 'await_resume' + // function is marked as 'noexcept', we avoid generating this additional + // IR. + CXXTryStmt *tryStmt = nullptr; + if (coro.exceptionHandler && kind == cir::AwaitKind::Init && + memberCallExpressionCanThrow(s.getResumeExpr())) + cgf.cgm.errorNYI("Coro resume Exception"); + + // FIXME(cir): the alloca for the resume expr should be placed in the + // enclosing cir.scope instead. + if (forLValue) { + assert(!cir::MissingFeatures::coroCoYield()); + } else { + awaitRes.rv = + cgf.emitAnyExpr(s.getResumeExpr(), aggSlot, ignoreResult); + if (!awaitRes.rv.isIgnored()) + // Create the alloca in the block before the scope wrapping + // cir.await. + assert(!cir::MissingFeatures::coroCoReturn()); + } + + if (tryStmt) + cgf.cgm.errorNYI("Coro tryStmt"); + + // Returns control back to parent. + cir::YieldOp::create(builder, loc); + }); + + assert(awaitBuild.succeeded() && "Should know how to codegen"); + return awaitRes; +} + +static RValue emitSuspendExpr(CIRGenFunction &cgf, + const CoroutineSuspendExpr &e, + cir::AwaitKind kind, AggValueSlot aggSlot, + bool ignoreResult) { + RValue rval; + mlir::Location scopeLoc = cgf.getLoc(e.getSourceRange()); + + // Since we model suspend / resume as an inner region, we must store + // resume scalar results in a tmp alloca, and load it after we build the + // suspend expression. An alternative way to do this would be to make + // every region return a value when promise.return_value() is used, but + // it's a bit awkward given that resume is the only region that actually + // returns a value. + mlir::Block *currEntryBlock = cgf.curLexScope->getEntryBlock(); + [[maybe_unused]] mlir::Value tmpResumeRValAddr; + + // No need to explicitly wrap this into a scope since the AST already uses a + // ExprWithCleanups, which will wrap this into a cir.scope anyways. + rval = emitSuspendExpression(cgf, *cgf.curCoro.data, e, kind, aggSlot, + ignoreResult, currEntryBlock, tmpResumeRValAddr, + /*forLValue*/ false) + .rv; + + if (ignoreResult || rval.isIgnored()) + return rval; + + if (rval.isScalar()) { + rval = RValue::get(cir::LoadOp::create(cgf.getBuilder(), scopeLoc, + rval.getValue().getType(), + tmpResumeRValAddr)); + } else if (rval.isAggregate()) { + // This is probably already handled via AggSlot, remove this assertion + // once we have a testcase and prove all pieces work. + cgf.cgm.errorNYI("emitSuspendExpr Aggregate"); + } else { // complex + cgf.cgm.errorNYI("emitSuspendExpr Complex"); + } + return rval; +} + +RValue CIRGenFunction::emitCoawaitExpr(const CoawaitExpr &e, + AggValueSlot aggSlot, + bool ignoreResult) { + return emitSuspendExpr(*this, e, curCoro.data->currentAwaitKind, aggSlot, + ignoreResult); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index aeea0ef..12b153a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -50,6 +50,41 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d, Address address = Address::invalid(); if (ty->isConstantSizeType()) { + // If this value is an array, struct, or vector with a statically + // determinable constant initializer, there are optimizations we can do. + // + // TODO: We should constant-evaluate the initializer of any variable, + // as long as it is initialized by a constant expression. Currently, + // isConstantInitializer produces wrong answers for structs with + // reference or bitfield members, and a few other cases, and checking + // for POD-ness protects us from some of these. + if (d.getInit() && + (ty->isArrayType() || ty->isRecordType() || ty->isVectorType()) && + (d.isConstexpr() || + ((ty.isPODType(getContext()) || + getContext().getBaseElementType(ty)->isObjCObjectPointerType()) && + d.getInit()->isConstantInitializer(getContext(), false)))) { + + // If the variable's a const type, and it's neither an NRVO + // candidate nor a __block variable and has no mutable members, + // emit it as a global instead. + // Exception is if a variable is located in non-constant address space + // in OpenCL. + // TODO(cir): perhaps we don't need this at all at CIR since this can + // be done as part of lowering down to LLVM. + bool needsDtor = + d.needsDestruction(getContext()) == QualType::DK_cxx_destructor; + if ((!getContext().getLangOpts().OpenCL || + ty.getAddressSpace() == LangAS::opencl_constant) && + (cgm.getCodeGenOpts().MergeAllConstants && !nrvo && + !d.isEscapingByref() && + ty.isConstantStorage(getContext(), true, !needsDtor))) { + cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: type constant"); + } + // Otherwise, tell the initialization code that we're in this case. + emission.isConstantAggregate = true; + } + // A normal fixed sized variable becomes an alloca in the entry block, // unless: // - it's an NRVO variable. @@ -63,8 +98,22 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d, if (const RecordDecl *rd = ty->getAsRecordDecl()) { if (const auto *cxxrd = dyn_cast<CXXRecordDecl>(rd); (cxxrd && !cxxrd->hasTrivialDestructor()) || - rd->isNonTrivialToPrimitiveDestroy()) - cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: set NRVO flag"); + rd->isNonTrivialToPrimitiveDestroy()) { + // In LLVM: Create a flag that is used to indicate when the NRVO was + // applied to this variable. Set it to zero to indicate that NRVO was + // not applied. For now, use the same approach for CIRGen until we can + // be sure it's worth doing something more aggressive. + cir::ConstantOp falseNVRO = builder.getFalse(loc); + Address nrvoFlag = createTempAlloca(falseNVRO.getType(), + CharUnits::One(), loc, "nrvo", + /*arraySize=*/nullptr); + assert(builder.getInsertionBlock()); + builder.createStore(loc, falseNVRO, nrvoFlag); + + // Record the NRVO flag for this variable. + nrvoFlags[&d] = nrvoFlag.getPointer(); + emission.nrvoFlag = nrvoFlag.getPointer(); + } } } else { // A normal fixed sized variable becomes an alloca in the entry block, @@ -131,6 +180,47 @@ bool CIRGenFunction::isTrivialInitializer(const Expr *init) { return false; } +static void emitStoresForConstant(CIRGenModule &cgm, const VarDecl &d, + Address addr, bool isVolatile, + CIRGenBuilderTy &builder, + mlir::TypedAttr constant) { + mlir::Type ty = constant.getType(); + cir::CIRDataLayout layout{cgm.getModule()}; + uint64_t constantSize = layout.getTypeAllocSize(ty); + if (!constantSize) + return; + assert(!cir::MissingFeatures::addAutoInitAnnotation()); + assert(!cir::MissingFeatures::vectorConstants()); + assert(!cir::MissingFeatures::shouldUseBZeroPlusStoresToInitialize()); + assert(!cir::MissingFeatures::shouldUseMemSetToInitialize()); + assert(!cir::MissingFeatures::shouldSplitConstantStore()); + assert(!cir::MissingFeatures::shouldCreateMemCpyFromGlobal()); + // In CIR we want to emit a store for the whole thing, later lowering + // prepare to LLVM should unwrap this into the best policy (see asserts + // above). + // + // FIXME(cir): This is closer to memcpy behavior but less optimal, instead of + // copy from a global, we just create a cir.const out of it. + + if (addr.getElementType() != ty) + addr = addr.withElementType(builder, ty); + + // If the address is an alloca, set the init attribute. + // The address is usually and alloca, but there is at least one case where + // emitAutoVarInit is called from the OpenACC codegen with an address that + // is not an alloca. + auto allocaOp = addr.getDefiningOp<cir::AllocaOp>(); + if (allocaOp) + allocaOp.setInitAttr(mlir::UnitAttr::get(&cgm.getMLIRContext())); + + // There are cases where OpenACC codegen calls emitAutoVarInit with a + // temporary decl that doesn't have a source range set. + mlir::Location loc = builder.getUnknownLoc(); + if (d.getSourceRange().isValid()) + loc = cgm.getLoc(d.getSourceRange()); + builder.createStore(loc, builder.getConstant(loc, constant), addr); +} + void CIRGenFunction::emitAutoVarInit( const CIRGenFunction::AutoVarEmission &emission) { assert(emission.variable && "emission was not valid!"); @@ -237,6 +327,9 @@ void CIRGenFunction::emitAutoVarInit( return emitStoreThroughLValue( RValue::get(builder.getConstant(initLoc, typedConstant)), lv); } + + emitStoresForConstant(cgm, d, addr, type.isVolatileQualified(), builder, + typedConstant); } void CIRGenFunction::emitAutoVarCleanups( @@ -361,7 +454,8 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &d, if (supportsCOMDAT() && gv.isWeakForLinker()) gv.setComdat(true); - assert(!cir::MissingFeatures::opGlobalThreadLocal()); + if (d.getTLSKind()) + errorNYI(d.getSourceRange(), "getOrCreateStaticVarDecl: TLS"); setGVProperties(gv, &d); @@ -447,7 +541,8 @@ cir::GlobalOp CIRGenFunction::addInitializerToStaticVarDecl( bool needsDtor = d.needsDestruction(getContext()) == QualType::DK_cxx_destructor; - assert(!cir::MissingFeatures::opGlobalConstant()); + gv.setConstant(d.getType().isConstantStorage( + getContext(), /*ExcludeCtor=*/true, !needsDtor)); gv.setInitialValueAttr(init); emitter.finalize(gv); @@ -718,21 +813,72 @@ namespace { struct DestroyObject final : EHScopeStack::Cleanup { DestroyObject(Address addr, QualType type, CIRGenFunction::Destroyer *destroyer) - : addr(addr), type(type), destroyer(destroyer) {} + : addr(addr), type(type), destroyer(destroyer) { + assert(!cir::MissingFeatures::useEHCleanupForArray()); + } Address addr; QualType type; CIRGenFunction::Destroyer *destroyer; - void emit(CIRGenFunction &cgf) override { + void emit(CIRGenFunction &cgf, Flags flags) override { + assert(!cir::MissingFeatures::useEHCleanupForArray()); cgf.emitDestroy(addr, type, destroyer); } }; +template <class Derived> struct DestroyNRVOVariable : EHScopeStack::Cleanup { + DestroyNRVOVariable(Address addr, QualType type, mlir::Value nrvoFlag) + : nrvoFlag(nrvoFlag), addr(addr), ty(type) {} + + mlir::Value nrvoFlag; + Address addr; + QualType ty; + + void emit(CIRGenFunction &cgf, Flags flags) override { + // Along the exceptions path we always execute the dtor. + bool nrvo = flags.isForNormalCleanup() && nrvoFlag; + + CIRGenBuilderTy &builder = cgf.getBuilder(); + mlir::OpBuilder::InsertionGuard guard(builder); + if (nrvo) { + // If we exited via NRVO, we skip the destructor call. + mlir::Location loc = addr.getPointer().getLoc(); + mlir::Value didNRVO = builder.createFlagLoad(loc, nrvoFlag); + mlir::Value notNRVO = builder.createNot(didNRVO); + cir::IfOp::create(builder, loc, notNRVO, /*withElseRegion=*/false, + [&](mlir::OpBuilder &b, mlir::Location) { + static_cast<Derived *>(this)->emitDestructorCall(cgf); + builder.createYield(loc); + }); + } else { + static_cast<Derived *>(this)->emitDestructorCall(cgf); + } + } + + virtual ~DestroyNRVOVariable() = default; +}; + +struct DestroyNRVOVariableCXX final + : DestroyNRVOVariable<DestroyNRVOVariableCXX> { + DestroyNRVOVariableCXX(Address addr, QualType type, + const CXXDestructorDecl *dtor, mlir::Value nrvoFlag) + : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, type, nrvoFlag), + dtor(dtor) {} + + const CXXDestructorDecl *dtor; + + void emitDestructorCall(CIRGenFunction &cgf) { + cgf.emitCXXDestructorCall(dtor, Dtor_Complete, + /*forVirtualBase=*/false, + /*delegating=*/false, addr, ty); + } +}; + struct CallStackRestore final : EHScopeStack::Cleanup { Address stack; CallStackRestore(Address stack) : stack(stack) {} - void emit(CIRGenFunction &cgf) override { + void emit(CIRGenFunction &cgf, Flags flags) override { mlir::Location loc = stack.getPointer().getLoc(); mlir::Value v = cgf.getBuilder().createLoad(loc, stack); cgf.getBuilder().createStackRestore(loc, v); @@ -834,6 +980,7 @@ void CIRGenFunction::emitDestroy(Address addr, QualType type, return; mlir::Value begin = addr.getPointer(); + assert(!cir::MissingFeatures::useEHCleanupForArray()); emitArrayDestroy(begin, length, type, elementAlign, destroyer); // If the array destroy didn't use the length op, we can erase it. @@ -885,7 +1032,10 @@ void CIRGenFunction::emitAutoVarTypeCleanup( // If there's an NRVO flag on the emission, we need a different // cleanup. if (emission.nrvoFlag) { - cgm.errorNYI(var->getSourceRange(), "emitAutoVarTypeCleanup: NRVO"); + assert(!type->isArrayType()); + CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor(); + ehStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, type, dtor, + emission.nrvoFlag); return; } // Otherwise, this is handled below. @@ -903,7 +1053,7 @@ void CIRGenFunction::emitAutoVarTypeCleanup( if (!destroyer) destroyer = getDestroyer(dtorKind); - assert(!cir::MissingFeatures::ehCleanupFlags()); + assert(!cir::MissingFeatures::useEHCleanupForArray()); ehStack.pushCleanup<DestroyObject>(cleanupKind, addr, type, destroyer); } diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp index b588a50..8e6a693 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp @@ -11,24 +11,411 @@ //===----------------------------------------------------------------------===// #include "CIRGenFunction.h" +#include "CIRGenOpenACCHelpers.h" + +#include "mlir/Dialect/OpenACC/OpenACC.h" #include "clang/AST/DeclOpenACC.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace clang::CIRGen; +namespace { +struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { + mlir::acc::DeclareEnterOp enterOp; + + OpenACCDeclareCleanup(mlir::acc::DeclareEnterOp enterOp) : enterOp(enterOp) {} + + template <typename OutTy, typename InTy> + void createOutOp(CIRGenFunction &cgf, InTy inOp) { + if constexpr (std::is_same_v<OutTy, mlir::acc::DeleteOp>) { + auto outOp = + OutTy::create(cgf.getBuilder(), inOp.getLoc(), inOp, + inOp.getStructured(), inOp.getImplicit(), + llvm::Twine(inOp.getNameAttr()), inOp.getBounds()); + outOp.setDataClause(inOp.getDataClause()); + outOp.setModifiers(inOp.getModifiers()); + } else { + auto outOp = + OutTy::create(cgf.getBuilder(), inOp.getLoc(), inOp, inOp.getVarPtr(), + inOp.getStructured(), inOp.getImplicit(), + llvm::Twine(inOp.getNameAttr()), inOp.getBounds()); + outOp.setDataClause(inOp.getDataClause()); + outOp.setModifiers(inOp.getModifiers()); + } + } + + void emit(CIRGenFunction &cgf, Flags flags) override { + auto exitOp = mlir::acc::DeclareExitOp::create( + cgf.getBuilder(), enterOp.getLoc(), enterOp, {}); + + // Some data clauses need to be referenced in 'exit', AND need to have an + // operation after the exit. Copy these from the enter operation. + for (mlir::Value val : enterOp.getDataClauseOperands()) { + if (auto copyin = val.getDefiningOp<mlir::acc::CopyinOp>()) { + switch (copyin.getDataClause()) { + default: + llvm_unreachable( + "OpenACC local declare clause copyin unexpected data clause"); + break; + case mlir::acc::DataClause::acc_copy: + createOutOp<mlir::acc::CopyoutOp>(cgf, copyin); + break; + case mlir::acc::DataClause::acc_copyin: + createOutOp<mlir::acc::DeleteOp>(cgf, copyin); + break; + } + } else if (auto create = val.getDefiningOp<mlir::acc::CreateOp>()) { + switch (create.getDataClause()) { + default: + llvm_unreachable( + "OpenACC local declare clause create unexpected data clause"); + break; + case mlir::acc::DataClause::acc_copyout: + createOutOp<mlir::acc::CopyoutOp>(cgf, create); + break; + case mlir::acc::DataClause::acc_create: + createOutOp<mlir::acc::DeleteOp>(cgf, create); + break; + } + } else if (auto present = val.getDefiningOp<mlir::acc::PresentOp>()) { + createOutOp<mlir::acc::DeleteOp>(cgf, present); + } else if (auto dev_res = + val.getDefiningOp<mlir::acc::DeclareDeviceResidentOp>()) { + createOutOp<mlir::acc::DeleteOp>(cgf, dev_res); + } else if (val.getDefiningOp<mlir::acc::DeclareLinkOp>()) { + // Link has no exit clauses, and shouldn't be copied. + continue; + } else if (val.getDefiningOp<mlir::acc::DevicePtrOp>()) { + // DevicePtr has no exit clauses, and shouldn't be copied. + continue; + } else { + llvm_unreachable("OpenACC local declare clause unexpected defining op"); + continue; + } + exitOp.getDataClauseOperandsMutable().append(val); + } + } +}; +} // namespace + +void CIRGenModule::emitGlobalOpenACCDecl(const OpenACCConstructDecl *d) { + if (const auto *rd = dyn_cast<OpenACCRoutineDecl>(d)) + emitGlobalOpenACCRoutineDecl(rd); + else + emitGlobalOpenACCDeclareDecl(cast<OpenACCDeclareDecl>(d)); +} + void CIRGenFunction::emitOpenACCDeclare(const OpenACCDeclareDecl &d) { - getCIRGenModule().errorNYI(d.getSourceRange(), "OpenACC Declare Construct"); + mlir::Location exprLoc = cgm.getLoc(d.getBeginLoc()); + auto enterOp = mlir::acc::DeclareEnterOp::create( + builder, exprLoc, mlir::acc::DeclareTokenType::get(&cgm.getMLIRContext()), + {}); + + emitOpenACCClauses(enterOp, OpenACCDirectiveKind::Declare, d.clauses()); + + ehStack.pushCleanup<OpenACCDeclareCleanup>(CleanupKind::NormalCleanup, + enterOp); +} + +// Helper function that gets the declaration referenced by the declare clause. +// This is a simplified verison of the work that `getOpenACCDataOperandInfo` +// does, as it only has to get forms that 'declare' does. +static const Decl *getDeclareReferencedDecl(const Expr *e) { + const Expr *curVarExpr = e->IgnoreParenImpCasts(); + + // Since we allow array sections, we have to unpack the array sections here. + // We don't have to worry about other bounds, since only variable or array + // name (plus array sections as an extension) are permitted. + while (const auto *ase = dyn_cast<ArraySectionExpr>(curVarExpr)) + curVarExpr = ase->getBase()->IgnoreParenImpCasts(); + + if (const auto *dre = dyn_cast<DeclRefExpr>(curVarExpr)) + return dre->getFoundDecl()->getCanonicalDecl(); + + // MemberExpr is allowed when it is implicit 'this'. + return cast<MemberExpr>(curVarExpr)->getMemberDecl()->getCanonicalDecl(); +} + +template <typename BeforeOpTy, typename DataClauseTy> +void CIRGenModule::emitGlobalOpenACCDeclareDataOperands( + const Expr *varOperand, DataClauseTy dataClause, + OpenACCModifierKind modifiers, bool structured, bool implicit, + bool requiresDtor) { + // This is a template argument so that we don't have to include all of + // mlir::acc into CIRGenModule. + static_assert(std::is_same_v<DataClauseTy, mlir::acc::DataClause>); + mlir::Location exprLoc = getLoc(varOperand->getBeginLoc()); + const Decl *refedDecl = getDeclareReferencedDecl(varOperand); + StringRef varName = getMangledName(GlobalDecl{cast<VarDecl>(refedDecl)}); + + // We have to emit two separate functions in this case, an acc_ctor and an + // acc_dtor. These two sections are/should remain reasonably equal, however + // the order of the clauses/vs-enter&exit in them makes combining these two + // sections not particularly attractive, so we have a bit of repetition. + { + mlir::OpBuilder::InsertionGuard guardCase(builder); + auto ctorOp = mlir::acc::GlobalConstructorOp::create( + builder, exprLoc, (varName + "_acc_ctor").str()); + getModule().push_back(ctorOp); + mlir::Block *block = builder.createBlock(&ctorOp.getRegion(), + ctorOp.getRegion().end(), {}, {}); + builder.setInsertionPointToEnd(block); + // These things are close enough to a function handling-wise we can just + // create this here. + CIRGenFunction cgf{*this, builder, true}; + llvm::SaveAndRestore<CIRGenFunction *> savedCGF(curCGF, &cgf); + cgf.curFn = ctorOp; + CIRGenFunction::SourceLocRAIIObject fnLoc{cgf, exprLoc}; + + // This gets the information we need, PLUS emits the bounds correctly, so we + // have to do this in both enter and exit. + CIRGenFunction::OpenACCDataOperandInfo inf = + cgf.getOpenACCDataOperandInfo(varOperand); + auto beforeOp = + BeforeOpTy::create(builder, exprLoc, inf.varValue, structured, implicit, + inf.name, inf.bounds); + beforeOp.setDataClause(dataClause); + beforeOp.setModifiers(convertOpenACCModifiers(modifiers)); + + mlir::acc::DeclareEnterOp::create( + builder, exprLoc, mlir::acc::DeclareTokenType::get(&getMLIRContext()), + beforeOp.getResult()); + + mlir::acc::TerminatorOp::create(builder, exprLoc); + } + + // copyin, create, and device_resident require a destructor, link does not. In + // the case of the first three, they are all a 'getdeviceptr', followed by the + // declare_exit, followed by a delete op in the destructor region. + if (requiresDtor) { + mlir::OpBuilder::InsertionGuard guardCase(builder); + auto ctorOp = mlir::acc::GlobalDestructorOp::create( + builder, exprLoc, (varName + "_acc_dtor").str()); + getModule().push_back(ctorOp); + mlir::Block *block = builder.createBlock(&ctorOp.getRegion(), + ctorOp.getRegion().end(), {}, {}); + builder.setInsertionPointToEnd(block); + + // These things are close enough to a function handling-wise we can just + // create this here. + CIRGenFunction cgf{*this, builder, true}; + llvm::SaveAndRestore<CIRGenFunction *> savedCGF(curCGF, &cgf); + cgf.curFn = ctorOp; + CIRGenFunction::SourceLocRAIIObject fnLoc{cgf, exprLoc}; + + CIRGenFunction::OpenACCDataOperandInfo inf = + cgf.getOpenACCDataOperandInfo(varOperand); + auto getDevPtr = mlir::acc::GetDevicePtrOp::create( + builder, exprLoc, inf.varValue, structured, implicit, inf.name, + inf.bounds); + getDevPtr.setDataClause(dataClause); + getDevPtr.setModifiers(convertOpenACCModifiers(modifiers)); + + mlir::acc::DeclareExitOp::create(builder, exprLoc, /*token=*/mlir::Value{}, + getDevPtr.getResult()); + auto deleteOp = mlir::acc::DeleteOp::create( + builder, exprLoc, getDevPtr, structured, implicit, inf.name, {}); + deleteOp.setDataClause(dataClause); + deleteOp.setModifiers(convertOpenACCModifiers(modifiers)); + mlir::acc::TerminatorOp::create(builder, exprLoc); + } +} +namespace { +// This class emits all of the information for a 'declare' at a global/ns/class +// scope. Each clause results in its own acc_ctor and acc_dtor for the variable. +// This class creates those and emits them properly. +// This behavior is unique/special enough from the emission of statement-level +// clauses that it doesn't really make sense to use that clause visitor. +class OpenACCGlobalDeclareClauseEmitter final + : public OpenACCClauseVisitor<OpenACCGlobalDeclareClauseEmitter> { + CIRGenModule &cgm; + +public: + OpenACCGlobalDeclareClauseEmitter(CIRGenModule &cgm) : cgm(cgm) {} + + void VisitClause(const OpenACCClause &clause) { + llvm_unreachable("Invalid OpenACC clause on global Declare"); + } + + void emitClauses(ArrayRef<const OpenACCClause *> clauses) { + this->VisitClauseList(clauses); + } + + void VisitCopyInClause(const OpenACCCopyInClause &clause) { + for (const Expr *var : clause.getVarList()) + cgm.emitGlobalOpenACCDeclareDataOperands<mlir::acc::CopyinOp>( + var, mlir::acc::DataClause::acc_copyin, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false, /*requiresDtor=*/true); + } + + void VisitCreateClause(const OpenACCCreateClause &clause) { + for (const Expr *var : clause.getVarList()) + cgm.emitGlobalOpenACCDeclareDataOperands<mlir::acc::CreateOp>( + var, mlir::acc::DataClause::acc_create, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false, /*requiresDtor=*/true); + } + + void VisitDeviceResidentClause(const OpenACCDeviceResidentClause &clause) { + for (const Expr *var : clause.getVarList()) + cgm.emitGlobalOpenACCDeclareDataOperands< + mlir::acc::DeclareDeviceResidentOp>( + var, mlir::acc::DataClause::acc_declare_device_resident, {}, + /*structured=*/true, + /*implicit=*/false, /*requiresDtor=*/true); + } + + void VisitLinkClause(const OpenACCLinkClause &clause) { + for (const Expr *var : clause.getVarList()) + cgm.emitGlobalOpenACCDeclareDataOperands<mlir::acc::DeclareLinkOp>( + var, mlir::acc::DataClause::acc_declare_link, {}, + /*structured=*/true, + /*implicit=*/false, /*requiresDtor=*/false); + } +}; +} // namespace + +void CIRGenModule::emitGlobalOpenACCDeclareDecl(const OpenACCDeclareDecl *d) { + // Declare creates 1 'acc_ctor' and 0-1 'acc_dtor' per clause, since it needs + // a unique one on a per-variable basis. We can just use a clause emitter to + // do all the work. + mlir::OpBuilder::InsertionGuard guardCase(builder); + OpenACCGlobalDeclareClauseEmitter em{*this}; + em.emitClauses(d->clauses()); } void CIRGenFunction::emitOpenACCRoutine(const OpenACCRoutineDecl &d) { - getCIRGenModule().errorNYI(d.getSourceRange(), "OpenACC Routine Construct"); + // Do nothing here. The OpenACCRoutineDeclAttr handles the implicit name + // cases, and the end-of-TU handling manages the named cases. This is + // necessary because these references aren't necessarily emitted themselves, + // but can be named anywhere. } -void CIRGenModule::emitGlobalOpenACCDecl(const OpenACCConstructDecl *d) { - if (isa<OpenACCRoutineDecl>(d)) - errorNYI(d->getSourceRange(), "OpenACC Routine Construct"); - else if (isa<OpenACCDeclareDecl>(d)) - errorNYI(d->getSourceRange(), "OpenACC Declare Construct"); - else - llvm_unreachable("unknown OpenACC declaration kind?"); +void CIRGenModule::emitGlobalOpenACCRoutineDecl(const OpenACCRoutineDecl *d) { + // Do nothing here. The OpenACCRoutineDeclAttr handles the implicit name + // cases, and the end-of-TU handling manages the named cases. This is + // necessary because these references aren't necessarily emitted themselves, + // but can be named anywhere. +} + +namespace { +class OpenACCRoutineClauseEmitter final + : public OpenACCClauseVisitor<OpenACCRoutineClauseEmitter> { + CIRGenModule &cgm; + CIRGen::CIRGenBuilderTy &builder; + mlir::acc::RoutineOp routineOp; + llvm::SmallVector<mlir::acc::DeviceType> lastDeviceTypeValues; + +public: + OpenACCRoutineClauseEmitter(CIRGenModule &cgm, + CIRGen::CIRGenBuilderTy &builder, + mlir::acc::RoutineOp routineOp) + : cgm(cgm), builder(builder), routineOp(routineOp) {} + + void emitClauses(ArrayRef<const OpenACCClause *> clauses) { + this->VisitClauseList(clauses); + } + + void VisitClause(const OpenACCClause &clause) { + llvm_unreachable("Invalid OpenACC clause on routine"); + } + + void VisitSeqClause(const OpenACCSeqClause &clause) { + routineOp.addSeq(builder.getContext(), lastDeviceTypeValues); + } + void VisitWorkerClause(const OpenACCWorkerClause &clause) { + routineOp.addWorker(builder.getContext(), lastDeviceTypeValues); + } + void VisitVectorClause(const OpenACCVectorClause &clause) { + routineOp.addVector(builder.getContext(), lastDeviceTypeValues); + } + + void VisitNoHostClause(const OpenACCNoHostClause &clause) { + routineOp.setNohost(/*attrValue=*/true); + } + + void VisitGangClause(const OpenACCGangClause &clause) { + // Gang has an optional 'dim' value, which is a constant int of 1, 2, or 3. + // If we don't store any expressions in the clause, there are none, else we + // expect there is 1, since Sema should enforce that the single 'dim' is the + // only valid value. + if (clause.getNumExprs() == 0) { + routineOp.addGang(builder.getContext(), lastDeviceTypeValues); + } else { + assert(clause.getNumExprs() == 1); + auto [kind, expr] = clause.getExpr(0); + assert(kind == OpenACCGangKind::Dim); + + llvm::APSInt curValue = expr->EvaluateKnownConstInt(cgm.getASTContext()); + // The value is 1, 2, or 3, but 64 bit seems right enough. + curValue = curValue.sextOrTrunc(64); + routineOp.addGang(builder.getContext(), lastDeviceTypeValues, + curValue.getZExtValue()); + } + } + + void VisitDeviceTypeClause(const OpenACCDeviceTypeClause &clause) { + lastDeviceTypeValues.clear(); + + for (const DeviceTypeArgument &arg : clause.getArchitectures()) + lastDeviceTypeValues.push_back(decodeDeviceType(arg.getIdentifierInfo())); + } + + void VisitBindClause(const OpenACCBindClause &clause) { + if (clause.isStringArgument()) { + mlir::StringAttr value = + builder.getStringAttr(clause.getStringArgument()->getString()); + + routineOp.addBindStrName(builder.getContext(), lastDeviceTypeValues, + value); + } else { + assert(clause.isIdentifierArgument()); + cgm.errorNYI(clause.getSourceRange(), + "Bind with an identifier argument is not yet supported"); + } + } +}; +} // namespace + +void CIRGenModule::emitOpenACCRoutineDecl( + const clang::FunctionDecl *funcDecl, cir::FuncOp func, + SourceLocation pragmaLoc, ArrayRef<const OpenACCClause *> clauses) { + mlir::OpBuilder::InsertionGuard guardCase(builder); + // These need to appear at the global module. + builder.setInsertionPointToEnd(&getModule().getBodyRegion().front()); + + mlir::Location routineLoc = getLoc(pragmaLoc); + + std::stringstream routineNameSS; + // This follows the same naming format as Flang. + routineNameSS << "acc_routine_" << routineCounter++; + std::string routineName = routineNameSS.str(); + + // There isn't a good constructor for RoutineOp that just takes a location + + // name + function, so we use one that creates an otherwise RoutineOp and + // count on the visitor/emitter to fill these in. + auto routineOp = mlir::acc::RoutineOp::create( + builder, routineLoc, routineName, + mlir::SymbolRefAttr::get(builder.getContext(), func.getName()), + /*implicit=*/false); + + // We have to add a pointer going the other direction via an acc.routine_info, + // from the func to the routine. + llvm::SmallVector<mlir::SymbolRefAttr> funcRoutines; + if (auto routineInfo = + func.getOperation()->getAttrOfType<mlir::acc::RoutineInfoAttr>( + mlir::acc::getRoutineInfoAttrName())) + funcRoutines.append(routineInfo.getAccRoutines().begin(), + routineInfo.getAccRoutines().end()); + + funcRoutines.push_back( + mlir::SymbolRefAttr::get(builder.getContext(), routineName)); + func.getOperation()->setAttr( + mlir::acc::getRoutineInfoAttrName(), + mlir::acc::RoutineInfoAttr::get(func.getContext(), funcRoutines)); + + OpenACCRoutineClauseEmitter emitter{*this, builder, routineOp}; + emitter.emitClauses(clauses); } diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp index 67f46ff..3758284 100644 --- a/clang/lib/CIR/CodeGen/CIRGenException.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp @@ -14,6 +14,8 @@ #include "CIRGenFunction.h" #include "clang/AST/StmtVisitor.h" +#include "clang/CIR/MissingFeatures.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace clang::CIRGen; @@ -343,7 +345,8 @@ void CIRGenFunction::enterCXXTryStmt(const CXXTryStmt &s, cir::TryOp tryOp, // No exception decl indicates '...', a catch-all. mlir::Region *handler = &tryOp.getHandlerRegions()[i]; - catchScope->setHandler(i, cgm.getCXXABI().getCatchAllTypeInfo(), handler); + catchScope->setHandler(i, cgm.getCXXABI().getCatchAllTypeInfo(), handler, + s.getHandler(i)); // Under async exceptions, catch(...) needs to catch HW exception too // Mark scope with SehTryBegin as a SEH __try scope @@ -382,5 +385,268 @@ void CIRGenFunction::exitCXXTryStmt(const CXXTryStmt &s, bool isFnTryBlock) { return; } - cgm.errorNYI("exitCXXTryStmt: Required catch"); + // Copy the handler blocks off before we pop the EH stack. Emitting + // the handlers might scribble on this memory. + SmallVector<EHCatchScope::Handler> handlers(catchScope.begin(), + catchScope.begin() + numHandlers); + + ehStack.popCatch(); + + // Determine if we need an implicit rethrow for all these catch handlers; + // see the comment below. + bool doImplicitRethrow = + isFnTryBlock && isa<CXXDestructorDecl, CXXConstructorDecl>(curCodeDecl); + + // Wasm uses Windows-style EH instructions, but merges all catch clauses into + // one big catchpad. So we save the old funclet pad here before we traverse + // each catch handler. + if (EHPersonality::get(*this).isWasmPersonality()) { + cgm.errorNYI("exitCXXTryStmt: WASM personality"); + return; + } + + bool hasCatchAll = false; + for (auto &handler : llvm::reverse(handlers)) { + hasCatchAll |= handler.isCatchAll(); + mlir::Region *catchRegion = handler.region; + const CXXCatchStmt *catchStmt = handler.stmt; + + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(&catchRegion->front()); + + // Enter a cleanup scope, including the catch variable and the + // end-catch. + RunCleanupsScope catchScope(*this); + + // Initialize the catch variable and set up the cleanups. + assert(!cir::MissingFeatures::catchParamOp()); + + // Emit the PGO counter increment. + assert(!cir::MissingFeatures::incrementProfileCounter()); + + // Perform the body of the catch. + [[maybe_unused]] mlir::LogicalResult emitResult = + emitStmt(catchStmt->getHandlerBlock(), /*useCurrentScope=*/true); + assert(emitResult.succeeded() && "failed to emit catch handler block"); + + assert(!cir::MissingFeatures::catchParamOp()); + cir::YieldOp::create(builder, tryOp->getLoc()); + + // [except.handle]p11: + // The currently handled exception is rethrown if control + // reaches the end of a handler of the function-try-block of a + // constructor or destructor. + + // It is important that we only do this on fallthrough and not on + // return. Note that it's illegal to put a return in a + // constructor function-try-block's catch handler (p14), so this + // really only applies to destructors. + if (doImplicitRethrow) { + cgm.errorNYI("exitCXXTryStmt: doImplicitRethrow"); + return; + } + + // Fall out through the catch cleanups. + catchScope.forceCleanup(); + } + + // Because in wasm we merge all catch clauses into one big catchpad, in case + // none of the types in catch handlers matches after we test against each of + // them, we should unwind to the next EH enclosing scope. We generate a call + // to rethrow function here to do that. + if (EHPersonality::get(*this).isWasmPersonality() && !hasCatchAll) { + cgm.errorNYI("exitCXXTryStmt: WASM personality without catch all"); + } + + assert(!cir::MissingFeatures::incrementProfileCounter()); +} + +void CIRGenFunction::populateCatchHandlers(cir::TryOp tryOp) { + assert(ehStack.requiresCatchOrCleanup()); + assert(!cgm.getLangOpts().IgnoreExceptions && + "LandingPad should not be emitted when -fignore-exceptions are in " + "effect."); + + EHScope &innermostEHScope = *ehStack.find(ehStack.getInnermostEHScope()); + switch (innermostEHScope.getKind()) { + case EHScope::Terminate: + cgm.errorNYI("populateCatchHandlers: terminate"); + return; + + case EHScope::Catch: + case EHScope::Cleanup: + case EHScope::Filter: + // CIR does not cache landing pads. + break; + } + + // If there's an existing TryOp, it means we got a `cir.try` scope + // that leads to this "landing pad" creation site. Otherwise, exceptions + // are enabled but a throwing function is called anyways (common pattern + // with function local static initializers). + mlir::ArrayAttr handlerTypesAttr = tryOp.getHandlerTypesAttr(); + if (!handlerTypesAttr || handlerTypesAttr.empty()) { + // Accumulate all the handlers in scope. + bool hasCatchAll = false; + llvm::SmallVector<mlir::Attribute, 4> handlerAttrs; + for (EHScopeStack::iterator i = ehStack.begin(), e = ehStack.end(); i != e; + ++i) { + switch (i->getKind()) { + case EHScope::Cleanup: + cgm.errorNYI("emitLandingPad: Cleanup"); + return; + + case EHScope::Filter: + cgm.errorNYI("emitLandingPad: Filter"); + return; + + case EHScope::Terminate: + cgm.errorNYI("emitLandingPad: Terminate"); + return; + + case EHScope::Catch: + break; + } // end switch + + EHCatchScope &catchScope = cast<EHCatchScope>(*i); + for (const EHCatchScope::Handler &handler : + llvm::make_range(catchScope.begin(), catchScope.end())) { + assert(handler.type.flags == 0 && + "landingpads do not support catch handler flags"); + + // If this is a catch-all, register that and abort. + if (handler.isCatchAll()) { + assert(!hasCatchAll); + hasCatchAll = true; + break; + } + + cgm.errorNYI("emitLandingPad: non catch-all"); + return; + } + + if (hasCatchAll) + break; + } + + if (hasCatchAll) { + handlerAttrs.push_back(cir::CatchAllAttr::get(&getMLIRContext())); + } else { + cgm.errorNYI("emitLandingPad: non catch-all"); + return; + } + + // Add final array of clauses into TryOp. + tryOp.setHandlerTypesAttr( + mlir::ArrayAttr::get(&getMLIRContext(), handlerAttrs)); + } + + // In traditional LLVM codegen. this tells the backend how to generate the + // landing pad by generating a branch to the dispatch block. In CIR, + // this is used to populate blocks for later filing during + // cleanup handling. + populateEHCatchRegions(ehStack.getInnermostEHScope(), tryOp); +} + +// Differently from LLVM traditional codegen, there are no dispatch blocks +// to look at given cir.try_call does not jump to blocks like invoke does. +// However. +void CIRGenFunction::populateEHCatchRegions(EHScopeStack::stable_iterator scope, + cir::TryOp tryOp) { + if (EHPersonality::get(*this).usesFuncletPads()) { + cgm.errorNYI("getEHDispatchBlock: usesFuncletPads"); + return; + } + + // Otherwise, we should look at the actual scope. + EHScope &ehScope = *ehStack.find(scope); + bool mayThrow = ehScope.mayThrow(); + + mlir::Block *originalBlock = nullptr; + if (mayThrow && tryOp) { + // If the dispatch is cached but comes from a different tryOp, make sure: + // - Populate current `tryOp` with a new dispatch block regardless. + // - Update the map to enqueue new dispatchBlock to also get a cleanup. See + // code at the end of the function. + cgm.errorNYI("getEHDispatchBlock: mayThrow & tryOp"); + return; + } + + if (!mayThrow) { + switch (ehScope.getKind()) { + case EHScope::Catch: { + // LLVM does some optimization with branches here, CIR just keep track of + // the corresponding calls. + EHCatchScope &catchScope = cast<EHCatchScope>(ehScope); + if (catchScope.getNumHandlers() == 1 && + catchScope.getHandler(0).isCatchAll()) { + mayThrow = true; + break; + } + cgm.errorNYI("getEHDispatchBlock: mayThrow non-catch all"); + return; + } + case EHScope::Cleanup: { + cgm.errorNYI("getEHDispatchBlock: mayThrow & cleanup"); + return; + } + case EHScope::Filter: { + cgm.errorNYI("getEHDispatchBlock: mayThrow & Filter"); + return; + } + case EHScope::Terminate: { + cgm.errorNYI("getEHDispatchBlock: mayThrow & Terminate"); + return; + } + } + } + + if (originalBlock) { + cgm.errorNYI("getEHDispatchBlock: originalBlock"); + return; + } + + ehScope.setMayThrow(mayThrow); +} + +// in classic codegen this function is mapping to `isInvokeDest` previously and +// currently it's mapping to the conditions that performs early returns in +// `getInvokeDestImpl`, in CIR we need the condition to know if the EH scope may +// throw exception or now. +bool CIRGenFunction::isCatchOrCleanupRequired() { + // If exceptions are disabled/ignored and SEH is not in use, then there is no + // invoke destination. SEH "works" even if exceptions are off. In practice, + // this means that C++ destructors and other EH cleanups don't run, which is + // consistent with MSVC's behavior, except in the presence of -EHa + const LangOptions &lo = cgm.getLangOpts(); + if (!lo.Exceptions || lo.IgnoreExceptions) { + if (!lo.Borland && !lo.MicrosoftExt) + return false; + cgm.errorNYI("isInvokeDest: no exceptions or ignore exception"); + return false; + } + + // CUDA device code doesn't have exceptions. + if (lo.CUDA && lo.CUDAIsDevice) + return false; + + return ehStack.requiresCatchOrCleanup(); +} + +// In classic codegen this function is equivalent to `getInvokeDestImpl`, in +// ClangIR we don't need to return to return any landing pad, we just need to +// populate the catch handlers if they are required +void CIRGenFunction::populateCatchHandlersIfRequired(cir::TryOp tryOp) { + assert(ehStack.requiresCatchOrCleanup()); + assert(!ehStack.empty()); + + assert(!cir::MissingFeatures::setFunctionPersonality()); + + // CIR does not cache landing pads. + const EHPersonality &personality = EHPersonality::get(*this); + if (personality.usesFuncletPads()) { + cgm.errorNYI("getInvokeDestImpl: usesFuncletPads"); + } else { + populateCatchHandlers(tryOp); + } } diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 5ccb431..cac046c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -22,7 +22,11 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/Basic/AddressSpaces.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include <optional> @@ -184,6 +188,7 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr, case CK_HLSLArrayRValue: case CK_HLSLElementwiseCast: case CK_HLSLVectorTruncation: + case CK_HLSLMatrixTruncation: case CK_IntToOCLSampler: case CK_IntegralCast: case CK_IntegralComplexCast: @@ -277,7 +282,6 @@ static LValue emitGlobalVarDeclLValue(CIRGenFunction &cgf, const Expr *e, QualType t = e->getType(); // If it's thread_local, emit a call to its wrapper function instead. - assert(!cir::MissingFeatures::opGlobalThreadLocal()); if (vd->getTLSKind() == VarDecl::TLS_Dynamic) cgf.cgm.errorNYI(e->getSourceRange(), "emitGlobalVarDeclLValue: thread_local variable"); @@ -311,8 +315,8 @@ static LValue emitGlobalVarDeclLValue(CIRGenFunction &cgf, const Expr *e, void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, bool isVolatile, QualType ty, - bool isInit, bool isNontemporal) { - assert(!cir::MissingFeatures::opLoadStoreThreadLocal()); + LValueBaseInfo baseInfo, bool isInit, + bool isNontemporal) { if (const auto *clangVecTy = ty->getAs<clang::VectorType>()) { // Boolean vectors use `iN` as storage type. @@ -333,7 +337,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, value = emitToMemory(value, ty); - assert(!cir::MissingFeatures::opLoadStoreAtomic()); + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + LValue atomicLValue = LValue::makeAddr(addr, ty, baseInfo); + if (ty->isAtomicType() || + (!isInit && isLValueSuitableForInlineAtomic(atomicLValue))) { + emitAtomicStore(RValue::get(value), atomicLValue, isInit); + return; + } // Update the alloca with more info on initialization. assert(addr.getPointer() && "expected pointer to exist"); @@ -550,13 +560,15 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue, } emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), isInit, /*isNontemporal=*/false); + lvalue.getType(), lvalue.getBaseInfo(), isInit, + /*isNontemporal=*/false); } mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, QualType ty, SourceLocation loc, LValueBaseInfo baseInfo) { - assert(!cir::MissingFeatures::opLoadStoreThreadLocal()); + // Traditional LLVM codegen handles thread local separately, CIR handles + // as part of getAddrOfGlobalVar (GetGlobalOp). mlir::Type eltTy = addr.getElementType(); if (const auto *clangVecTy = ty->getAs<clang::VectorType>()) { @@ -619,10 +631,82 @@ RValue CIRGenFunction::emitLoadOfLValue(LValue lv, SourceLocation loc) { lv.getVectorIdx())); } + if (lv.isExtVectorElt()) + return emitLoadOfExtVectorElementLValue(lv); + cgm.errorNYI(loc, "emitLoadOfLValue"); return RValue::get(nullptr); } +int64_t CIRGenFunction::getAccessedFieldNo(unsigned int idx, + const mlir::ArrayAttr elts) { + auto elt = mlir::cast<mlir::IntegerAttr>(elts[idx]); + return elt.getInt(); +} + +// If this is a reference to a subset of the elements of a vector, create an +// appropriate shufflevector. +RValue CIRGenFunction::emitLoadOfExtVectorElementLValue(LValue lv) { + mlir::Location loc = lv.getExtVectorPointer().getLoc(); + mlir::Value vec = builder.createLoad(loc, lv.getExtVectorAddress()); + + // HLSL allows treating scalars as one-element vectors. Converting the scalar + // IR value to a vector here allows the rest of codegen to behave as normal. + if (getLangOpts().HLSL && !mlir::isa<cir::VectorType>(vec.getType())) { + cgm.errorNYI(loc, "emitLoadOfExtVectorElementLValue: HLSL"); + return {}; + } + + const mlir::ArrayAttr elts = lv.getExtVectorElts(); + + // If the result of the expression is a non-vector type, we must be extracting + // a single element. Just codegen as an extractelement. + const auto *exprVecTy = lv.getType()->getAs<clang::VectorType>(); + if (!exprVecTy) { + int64_t indexValue = getAccessedFieldNo(0, elts); + cir::ConstantOp index = + builder.getConstInt(loc, builder.getSInt64Ty(), indexValue); + return RValue::get(cir::VecExtractOp::create(builder, loc, vec, index)); + } + + // Always use shuffle vector to try to retain the original program structure + SmallVector<int64_t> mask; + for (auto i : llvm::seq<unsigned>(0, exprVecTy->getNumElements())) + mask.push_back(getAccessedFieldNo(i, elts)); + + cir::VecShuffleOp resultVec = builder.createVecShuffle(loc, vec, mask); + if (lv.getType()->isExtVectorBoolType()) { + cgm.errorNYI(loc, "emitLoadOfExtVectorElementLValue: ExtVectorBoolType"); + return {}; + } + + return RValue::get(resultVec); +} + +/// Generates lvalue for partial ext_vector access. +Address CIRGenFunction::emitExtVectorElementLValue(LValue lv, + mlir::Location loc) { + Address vectorAddress = lv.getExtVectorAddress(); + QualType elementTy = lv.getType()->castAs<VectorType>()->getElementType(); + mlir::Type vectorElementTy = cgm.getTypes().convertType(elementTy); + Address castToPointerElement = + vectorAddress.withElementType(builder, vectorElementTy); + + mlir::ArrayAttr extVecElts = lv.getExtVectorElts(); + unsigned idx = getAccessedFieldNo(0, extVecElts); + mlir::Value idxValue = + builder.getConstInt(loc, mlir::cast<cir::IntType>(ptrDiffTy), idx); + + mlir::Value elementValue = builder.getArrayElement( + loc, loc, castToPointerElement.getPointer(), vectorElementTy, idxValue, + /*shouldDecay=*/false); + + const CharUnits eltSize = getContext().getTypeSizeInChars(elementTy); + const CharUnits alignment = + castToPointerElement.getAlignment().alignmentAtOffset(idx * eltSize); + return Address(elementValue, vectorElementTy, alignment); +} + static cir::FuncOp emitFunctionDeclPointer(CIRGenModule &cgm, GlobalDecl gd) { assert(!cir::MissingFeatures::weakRefReference()); return cgm.getAddrOfFunction(gd); @@ -1021,12 +1105,6 @@ static Address emitArraySubscriptPtr(CIRGenFunction &cgf, LValue CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) { - if (isa<ExtVectorElementExpr>(e->getBase())) { - cgm.errorNYI(e->getSourceRange(), - "emitArraySubscriptExpr: ExtVectorElementExpr"); - return LValue::makeAddr(Address::invalid(), e->getType(), LValueBaseInfo()); - } - if (getContext().getAsVariableArrayType(e->getType())) { cgm.errorNYI(e->getSourceRange(), "emitArraySubscriptExpr: VariableArrayType"); @@ -1056,15 +1134,30 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) { // If the base is a vector type, then we are forming a vector element // with this subscript. - if (e->getBase()->getType()->isVectorType() && + if (e->getBase()->getType()->isSubscriptableVectorType() && !isa<ExtVectorElementExpr>(e->getBase())) { const mlir::Value idx = emitIdxAfterBase(/*promote=*/false); - const LValue lhs = emitLValue(e->getBase()); - return LValue::makeVectorElt(lhs.getAddress(), idx, e->getBase()->getType(), - lhs.getBaseInfo()); + const LValue lv = emitLValue(e->getBase()); + return LValue::makeVectorElt(lv.getAddress(), idx, e->getBase()->getType(), + lv.getBaseInfo()); } const mlir::Value idx = emitIdxAfterBase(/*promote=*/true); + + // Handle the extvector case we ignored above. + if (isa<ExtVectorElementExpr>(e->getBase())) { + const LValue lv = emitLValue(e->getBase()); + Address addr = emitExtVectorElementLValue(lv, cgm.getLoc(e->getExprLoc())); + + QualType elementType = lv.getType()->castAs<VectorType>()->getElementType(); + addr = emitArraySubscriptPtr(*this, cgm.getLoc(e->getBeginLoc()), + cgm.getLoc(e->getEndLoc()), addr, e->getType(), + idx, cgm.getLoc(e->getExprLoc()), + /*shouldDecay=*/false); + + return makeAddrLValue(addr, elementType, lv.getBaseInfo()); + } + if (const Expr *array = getSimpleArrayDecayOperand(e->getBase())) { LValue arrayLV; if (const auto *ase = dyn_cast<ArraySubscriptExpr>(array)) @@ -1108,6 +1201,62 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) { return lv; } +LValue CIRGenFunction::emitExtVectorElementExpr(const ExtVectorElementExpr *e) { + // Emit the base vector as an l-value. + LValue base; + + // ExtVectorElementExpr's base can either be a vector or pointer to vector. + if (e->isArrow()) { + // If it is a pointer to a vector, emit the address and form an lvalue with + // it. + LValueBaseInfo baseInfo; + Address ptr = emitPointerWithAlignment(e->getBase(), &baseInfo); + const auto *clangPtrTy = + e->getBase()->getType()->castAs<clang::PointerType>(); + base = makeAddrLValue(ptr, clangPtrTy->getPointeeType(), baseInfo); + base.getQuals().removeObjCGCAttr(); + } else if (e->getBase()->isGLValue()) { + // Otherwise, if the base is an lvalue ( as in the case of foo.x.x), + // emit the base as an lvalue. + assert(e->getBase()->getType()->isVectorType()); + base = emitLValue(e->getBase()); + } else { + // Otherwise, the base is a normal rvalue (as in (V+V).x), emit it as such. + assert(e->getBase()->getType()->isVectorType() && + "Result must be a vector"); + mlir::Value vec = emitScalarExpr(e->getBase()); + + // Store the vector to memory (because LValue wants an address). + QualType baseTy = e->getBase()->getType(); + Address vecMem = createMemTemp(baseTy, vec.getLoc(), "tmp"); + if (!getLangOpts().HLSL && baseTy->isExtVectorBoolType()) { + cgm.errorNYI(e->getSourceRange(), + "emitExtVectorElementExpr: ExtVectorBoolType & !HLSL"); + return {}; + } + builder.createStore(vec.getLoc(), vec, vecMem); + base = makeAddrLValue(vecMem, baseTy, AlignmentSource::Decl); + } + + QualType type = + e->getType().withCVRQualifiers(base.getQuals().getCVRQualifiers()); + + // Encode the element access list into a vector of unsigned indices. + SmallVector<uint32_t, 4> indices; + e->getEncodedElementAccess(indices); + + if (base.isSimple()) { + SmallVector<int64_t> attrElts(indices.begin(), indices.end()); + mlir::ArrayAttr elts = builder.getI64ArrayAttr(attrElts); + return LValue::makeExtVectorElt(base.getAddress(), elts, type, + base.getBaseInfo()); + } + + cgm.errorNYI(e->getSourceRange(), + "emitExtVectorElementExpr: isSimple is false"); + return {}; +} + LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e, llvm::StringRef name) { cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e, name); @@ -1174,6 +1323,7 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLMatrixTruncation: case CK_HLSLArrayRValue: case CK_HLSLElementwiseCast: case CK_HLSLAggregateSplatCast: @@ -1196,8 +1346,6 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { case CK_NonAtomicToAtomic: case CK_AtomicToNonAtomic: case CK_ToUnion: - case CK_BaseToDerived: - case CK_AddressSpaceConversion: case CK_ObjCObjectLValueCast: case CK_VectorSplat: case CK_ConstructorConversion: @@ -1211,6 +1359,27 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { return {}; } + case CK_AddressSpaceConversion: { + LValue lv = emitLValue(e->getSubExpr()); + QualType destTy = getContext().getPointerType(e->getType()); + + clang::LangAS srcLangAS = e->getSubExpr()->getType().getAddressSpace(); + cir::TargetAddressSpaceAttr srcAS; + if (clang::isTargetAddressSpace(srcLangAS)) + srcAS = cir::toCIRTargetAddressSpace(getMLIRContext(), srcLangAS); + else + cgm.errorNYI( + e->getSourceRange(), + "emitCastLValue: address space conversion from unknown address " + "space"); + + mlir::Value v = getTargetHooks().performAddrSpaceCast( + *this, lv.getPointer(), srcAS, convertType(destTy)); + + return makeAddrLValue(Address(v, convertTypeForMem(e->getType()), + lv.getAddress().getAlignment()), + e->getType(), lv.getBaseInfo()); + } case CK_LValueBitCast: { // This must be a reinterpret_cast (or c-style equivalent). @@ -1263,6 +1432,22 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { return makeAddrLValue(baseAddr, e->getType(), lv.getBaseInfo()); } + case CK_BaseToDerived: { + const auto *derivedClassDecl = e->getType()->castAsCXXRecordDecl(); + LValue lv = emitLValue(e->getSubExpr()); + + // Perform the base-to-derived conversion + Address derived = getAddressOfDerivedClass( + getLoc(e->getSourceRange()), lv.getAddress(), derivedClassDecl, + e->path(), /*NullCheckValue=*/false); + // C++11 [expr.static.cast]p2: Behavior is undefined if a downcast is + // performed and the object is not of the derived type. + assert(!cir::MissingFeatures::sanitizers()); + + assert(!cir::MissingFeatures::opTBAA()); + return makeAddrLValue(derived, e->getType(), lv.getBaseInfo()); + } + case CK_ZeroToOCLOpaqueType: llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid"); } @@ -1630,7 +1815,7 @@ RValue CIRGenFunction::emitAnyExpr(const Expr *e, AggValueSlot aggSlot, bool ignoreResult) { switch (CIRGenFunction::getEvaluationKind(e->getType())) { case cir::TEK_Scalar: - return RValue::get(emitScalarExpr(e)); + return RValue::get(emitScalarExpr(e, ignoreResult)); case cir::TEK_Complex: return RValue::getComplex(emitComplexExpr(e)); case cir::TEK_Aggregate: { @@ -1658,11 +1843,7 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) { const auto *fd = cast<FunctionDecl>(gd.getDecl()); if (unsigned builtinID = fd->getBuiltinID()) { - if (fd->getAttr<AsmLabelAttr>()) { - cgm.errorNYI("AsmLabelAttr"); - } - - StringRef ident = fd->getName(); + StringRef ident = cgm.getMangledName(gd); std::string fdInlineName = (ident + ".inline").str(); bool isPredefinedLibFunction = @@ -1690,8 +1871,7 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) { clone.setLinkageAttr(cir::GlobalLinkageKindAttr::get( &cgm.getMLIRContext(), cir::GlobalLinkageKind::InternalLinkage)); clone.setSymVisibility("private"); - clone.setInlineKindAttr(cir::InlineAttr::get( - &cgm.getMLIRContext(), cir::InlineKind::AlwaysInline)); + clone.setInlineKind(cir::InlineKind::AlwaysInline); } return CIRGenCallee::forDirect(clone, gd); } @@ -2126,79 +2306,6 @@ RValue CIRGenFunction::emitCXXMemberCallExpr(const CXXMemberCallExpr *ce, ce, md, returnValue, hasQualifier, qualifier, isArrow, base); } -void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e, - AggValueSlot dest) { - assert(!dest.isIgnored() && "Must have a destination!"); - const CXXConstructorDecl *cd = e->getConstructor(); - - // If we require zero initialization before (or instead of) calling the - // constructor, as can be the case with a non-user-provided default - // constructor, emit the zero initialization now, unless destination is - // already zeroed. - if (e->requiresZeroInitialization() && !dest.isZeroed()) { - switch (e->getConstructionKind()) { - case CXXConstructionKind::Delegating: - case CXXConstructionKind::Complete: - emitNullInitialization(getLoc(e->getSourceRange()), dest.getAddress(), - e->getType()); - break; - case CXXConstructionKind::VirtualBase: - case CXXConstructionKind::NonVirtualBase: - cgm.errorNYI(e->getSourceRange(), - "emitCXXConstructExpr: base requires initialization"); - break; - } - } - - // If this is a call to a trivial default constructor, do nothing. - if (cd->isTrivial() && cd->isDefaultConstructor()) - return; - - // Elide the constructor if we're constructing from a temporary - if (getLangOpts().ElideConstructors && e->isElidable()) { - // FIXME: This only handles the simplest case, where the source object is - // passed directly as the first argument to the constructor. This - // should also handle stepping through implicit casts and conversion - // sequences which involve two steps, with a conversion operator - // follwed by a converting constructor. - const Expr *srcObj = e->getArg(0); - assert(srcObj->isTemporaryObject(getContext(), cd->getParent())); - assert( - getContext().hasSameUnqualifiedType(e->getType(), srcObj->getType())); - emitAggExpr(srcObj, dest); - return; - } - - if (const ArrayType *arrayType = getContext().getAsArrayType(e->getType())) { - assert(!cir::MissingFeatures::sanitizers()); - emitCXXAggrConstructorCall(cd, arrayType, dest.getAddress(), e, false); - } else { - - clang::CXXCtorType type = Ctor_Complete; - bool forVirtualBase = false; - bool delegating = false; - - switch (e->getConstructionKind()) { - case CXXConstructionKind::Complete: - type = Ctor_Complete; - break; - case CXXConstructionKind::Delegating: - // We should be emitting a constructor; GlobalDecl will assert this - type = curGD.getCtorType(); - delegating = true; - break; - case CXXConstructionKind::VirtualBase: - forVirtualBase = true; - [[fallthrough]]; - case CXXConstructionKind::NonVirtualBase: - type = Ctor_Base; - break; - } - - emitCXXConstructorCall(cd, type, forVirtualBase, delegating, dest, e); - } -} - RValue CIRGenFunction::emitReferenceBindingToExpr(const Expr *e) { // Emit the expression as an lvalue. LValue lv = emitLValue(e); @@ -2298,6 +2405,8 @@ Address CIRGenFunction::createTempAllocaWithoutCast( /// This creates a alloca and inserts it into the entry block. The alloca is /// casted to default address space if necessary. +// TODO(cir): Implement address space casting to match classic codegen's +// CreateTempAlloca behavior with DestLangAS parameter Address CIRGenFunction::createTempAlloca(mlir::Type ty, CharUnits align, mlir::Location loc, const Twine &name, mlir::Value arraySize, @@ -2312,7 +2421,21 @@ Address CIRGenFunction::createTempAlloca(mlir::Type ty, CharUnits align, // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - assert(!cir::MissingFeatures::addressSpace()); + + LangAS allocaAS = alloca.getAddressSpace() + ? clang::getLangASFromTargetAS( + alloca.getAddressSpace().getValue().getUInt()) + : clang::LangAS::Default; + LangAS dstTyAS = clang::LangAS::Default; + if (getCIRAllocaAddressSpace()) { + dstTyAS = clang::getLangASFromTargetAS( + getCIRAllocaAddressSpace().getValue().getUInt()); + } + + if (dstTyAS != allocaAS) { + getTargetHooks().performAddrSpaceCast(*this, v, getCIRAllocaAddressSpace(), + builder.getPointerTo(ty, dstTyAS)); + } return Address(v, ty, align); } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index 3d3030c..872fc8d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -343,8 +343,8 @@ public: cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitNoInitExpr"); } void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *dae) { - cgf.cgm.errorNYI(dae->getSourceRange(), - "AggExprEmitter: VisitCXXDefaultArgExpr"); + CIRGenFunction::CXXDefaultArgExprScope scope(cgf, dae); + Visit(dae->getExpr()); } void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *e) { cgf.cgm.errorNYI(e->getSourceRange(), @@ -779,8 +779,8 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr( Expr *e, ArrayRef<Expr *> args, FieldDecl *initializedFieldInUnion, Expr *arrayFiller) { - const AggValueSlot dest = - ensureSlot(cgf.getLoc(e->getSourceRange()), e->getType()); + const mlir::Location loc = cgf.getLoc(e->getSourceRange()); + const AggValueSlot dest = ensureSlot(loc, e->getType()); if (e->getType()->isConstantArrayType()) { cir::ArrayType arrayTy = @@ -819,13 +819,29 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr( if (auto *cxxrd = dyn_cast<CXXRecordDecl>(record)) { assert(numInitElements >= cxxrd->getNumBases() && "missing initializer for base class"); - if (cxxrd->getNumBases() > 0) { - cgf.cgm.errorNYI(e->getSourceRange(), - "visitCXXParenListOrInitListExpr base class init"); - return; + for (auto &base : cxxrd->bases()) { + assert(!base.isVirtual() && "should not see vbases here"); + CXXRecordDecl *baseRD = base.getType()->getAsCXXRecordDecl(); + Address address = cgf.getAddressOfDirectBaseInCompleteClass( + loc, dest.getAddress(), cxxrd, baseRD, + /*baseIsVirtual=*/false); + assert(!cir::MissingFeatures::aggValueSlotGC()); + AggValueSlot aggSlot = AggValueSlot::forAddr( + address, Qualifiers(), AggValueSlot::IsDestructed, + AggValueSlot::IsNotAliased, + cgf.getOverlapForBaseInit(cxxrd, baseRD, false)); + cgf.emitAggExpr(args[curInitIndex++], aggSlot); + if (base.getType().isDestructedType()) { + cgf.cgm.errorNYI(e->getSourceRange(), + "push deferred deactivation cleanup"); + return; + } } } + // Prepare a 'this' for CXXDefaultInitExprs. + CIRGenFunction::FieldConstructionScope fcScope(cgf, dest.getAddress()); + LValue destLV = cgf.makeAddrLValue(dest.getAddress(), e->getType()); if (record->isUnion()) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 9dd9b6d..a4acbac 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -16,6 +16,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/ExprCXX.h" +#include "clang/Basic/OperatorKinds.h" #include "clang/CIR/MissingFeatures.h" using namespace clang; @@ -234,6 +235,127 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall( return emitCall(fnInfo, callee, returnValue, args, nullptr, loc); } +static void emitNullBaseClassInitialization(CIRGenFunction &cgf, + Address destPtr, + const CXXRecordDecl *base) { + if (base->isEmpty()) + return; + + const ASTRecordLayout &layout = cgf.getContext().getASTRecordLayout(base); + CharUnits nvSize = layout.getNonVirtualSize(); + + // We cannot simply zero-initialize the entire base sub-object if vbptrs are + // present, they are initialized by the most derived class before calling the + // constructor. + SmallVector<std::pair<CharUnits, CharUnits>, 1> stores; + stores.emplace_back(CharUnits::Zero(), nvSize); + + // Each store is split by the existence of a vbptr. + // TODO(cir): This only needs handling for the MS CXXABI. + assert(!cir::MissingFeatures::msabi()); + + // If the type contains a pointer to data member we can't memset it to zero. + // Instead, create a null constant and copy it to the destination. + // TODO: there are other patterns besides zero that we can usefully memset, + // like -1, which happens to be the pattern used by member-pointers. + // TODO: isZeroInitializable can be over-conservative in the case where a + // virtual base contains a member pointer. + mlir::TypedAttr nullConstantForBase = cgf.cgm.emitNullConstantForBase(base); + if (!cgf.getBuilder().isNullValue(nullConstantForBase)) { + cgf.cgm.errorNYI( + base->getSourceRange(), + "emitNullBaseClassInitialization: base constant is not null"); + } else { + // Otherwise, just memset the whole thing to zero. This is legal + // because in LLVM, all default initializers (other than the ones we just + // handled above) are guaranteed to have a bit pattern of all zeros. + // TODO(cir): When the MS CXXABI is supported, we will need to iterate over + // `stores` and create a separate memset for each one. For now, we know that + // there will only be one store and it will begin at offset zero, so that + // simplifies this code considerably. + assert(stores.size() == 1 && "Expected only one store"); + assert(stores[0].first == CharUnits::Zero() && + "Expected store to begin at offset zero"); + CIRGenBuilderTy builder = cgf.getBuilder(); + mlir::Location loc = cgf.getLoc(base->getBeginLoc()); + builder.createStore(loc, builder.getConstant(loc, nullConstantForBase), + destPtr); + } +} + +void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e, + AggValueSlot dest) { + assert(!dest.isIgnored() && "Must have a destination!"); + const CXXConstructorDecl *cd = e->getConstructor(); + + // If we require zero initialization before (or instead of) calling the + // constructor, as can be the case with a non-user-provided default + // constructor, emit the zero initialization now, unless destination is + // already zeroed. + if (e->requiresZeroInitialization() && !dest.isZeroed()) { + switch (e->getConstructionKind()) { + case CXXConstructionKind::Delegating: + case CXXConstructionKind::Complete: + emitNullInitialization(getLoc(e->getSourceRange()), dest.getAddress(), + e->getType()); + break; + case CXXConstructionKind::VirtualBase: + case CXXConstructionKind::NonVirtualBase: + emitNullBaseClassInitialization(*this, dest.getAddress(), + cd->getParent()); + break; + } + } + + // If this is a call to a trivial default constructor, do nothing. + if (cd->isTrivial() && cd->isDefaultConstructor()) + return; + + // Elide the constructor if we're constructing from a temporary + if (getLangOpts().ElideConstructors && e->isElidable()) { + // FIXME: This only handles the simplest case, where the source object is + // passed directly as the first argument to the constructor. This + // should also handle stepping through implicit casts and conversion + // sequences which involve two steps, with a conversion operator + // follwed by a converting constructor. + const Expr *srcObj = e->getArg(0); + assert(srcObj->isTemporaryObject(getContext(), cd->getParent())); + assert( + getContext().hasSameUnqualifiedType(e->getType(), srcObj->getType())); + emitAggExpr(srcObj, dest); + return; + } + + if (const ArrayType *arrayType = getContext().getAsArrayType(e->getType())) { + assert(!cir::MissingFeatures::sanitizers()); + emitCXXAggrConstructorCall(cd, arrayType, dest.getAddress(), e, false); + } else { + + clang::CXXCtorType type = Ctor_Complete; + bool forVirtualBase = false; + bool delegating = false; + + switch (e->getConstructionKind()) { + case CXXConstructionKind::Complete: + type = Ctor_Complete; + break; + case CXXConstructionKind::Delegating: + // We should be emitting a constructor; GlobalDecl will assert this + type = curGD.getCtorType(); + delegating = true; + break; + case CXXConstructionKind::VirtualBase: + forVirtualBase = true; + [[fallthrough]]; + case CXXConstructionKind::NonVirtualBase: + type = Ctor_Base; + break; + } + + emitCXXConstructorCall(cd, type, forVirtualBase, delegating, dest, e); + } +} + static CharUnits calculateCookiePadding(CIRGenFunction &cgf, const CXXNewExpr *e) { if (!e->isArray()) @@ -305,7 +427,7 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, const llvm::APInt &count = mlir::cast<cir::IntAttr>(constNumElements).getValue(); - unsigned numElementsWidth = count.getBitWidth(); + [[maybe_unused]] unsigned numElementsWidth = count.getBitWidth(); bool hasAnyOverflow = false; // The equivalent code in CodeGen/CGExprCXX.cpp handles these cases as @@ -527,6 +649,36 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf, return rv; } +RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, + const CallExpr *callExpr, + OverloadedOperatorKind op) { + CallArgList args; + emitCallArgs(args, type, callExpr->arguments()); + // Find the allocation or deallocation function that we're calling. + ASTContext &astContext = getContext(); + assert(op == OO_New || op == OO_Delete); + DeclarationName name = astContext.DeclarationNames.getCXXOperatorName(op); + + clang::DeclContextLookupResult lookupResult = + astContext.getTranslationUnitDecl()->lookup(name); + for (const auto *decl : lookupResult) { + if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) { + if (astContext.hasSameType(funcDecl->getType(), QualType(type, 0))) { + if (sanOpts.has(SanitizerKind::AllocToken)) { + // TODO: Set !alloc_token metadata. + assert(!cir::MissingFeatures::allocToken()); + cgm.errorNYI("Alloc token sanitizer not yet supported!"); + } + + // Emit the call to operator new/delete. + return emitNewDeleteCall(*this, funcDecl, type, args); + } + } + } + + llvm_unreachable("predeclared global operator new/delete is missing"); +} + namespace { /// Calls the given 'operator delete' on a single object. struct CallObjectDelete final : EHScopeStack::Cleanup { @@ -538,7 +690,7 @@ struct CallObjectDelete final : EHScopeStack::Cleanup { QualType elementType) : ptr(ptr), operatorDelete(operatorDelete), elementType(elementType) {} - void emit(CIRGenFunction &cgf) override { + void emit(CIRGenFunction &cgf, Flags flags) override { cgf.emitDeleteCall(operatorDelete, ptr, elementType); } }; @@ -654,8 +806,27 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { Address allocation = Address::invalid(); CallArgList allocatorArgs; if (allocator->isReservedGlobalPlacementOperator()) { - cgm.errorNYI(e->getSourceRange(), - "emitCXXNewExpr: reserved global placement operator"); + // If the allocator is a global placement operator, just + // "inline" it directly. + assert(e->getNumPlacementArgs() == 1); + const Expr *arg = *e->placement_arguments().begin(); + + LValueBaseInfo baseInfo; + allocation = emitPointerWithAlignment(arg, &baseInfo); + + // The pointer expression will, in many cases, be an opaque void*. + // In these cases, discard the computed alignment and use the + // formal alignment of the allocated type. + if (baseInfo.getAlignmentSource() != AlignmentSource::Decl) + allocation = allocation.withAlignment(allocAlign); + + // Set up allocatorArgs for the call to operator delete if it's not + // the reserved global operator. + if (e->getOperatorDelete() && + !e->getOperatorDelete()->isReservedGlobalPlacementOperator()) { + cgm.errorNYI(e->getSourceRange(), + "emitCXXNewExpr: reserved placement new with delete"); + } } else { const FunctionProtoType *allocatorType = allocator->getType()->castAs<FunctionProtoType>(); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index 047f359..d112425 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -339,7 +339,7 @@ mlir::Value ComplexExprEmitter::emitLoadOfLValue(LValue lv, cgf.cgm.errorNYI(loc, "emitLoadOfLValue with Atomic LV"); const Address srcAddr = lv.getAddress(); - return builder.createLoad(cgf.getLoc(loc), srcAddr); + return builder.createLoad(cgf.getLoc(loc), srcAddr, lv.isVolatileQualified()); } /// EmitStoreOfComplex - Store the specified real/imag parts into the @@ -353,7 +353,7 @@ void ComplexExprEmitter::emitStoreOfComplex(mlir::Location loc, mlir::Value val, } const Address destAddr = lv.getAddress(); - builder.createStore(loc, val, destAddr); + builder.createStore(loc, val, destAddr, lv.isVolatileQualified()); } //===----------------------------------------------------------------------===// @@ -400,8 +400,13 @@ mlir::Value ComplexExprEmitter::VisitCallExpr(const CallExpr *e) { } mlir::Value ComplexExprEmitter::VisitStmtExpr(const StmtExpr *e) { - cgf.cgm.errorNYI(e->getExprLoc(), "ComplexExprEmitter VisitExpr"); - return {}; + CIRGenFunction::StmtExprEvaluation eval(cgf); + Address retAlloca = + cgf.createMemTemp(e->getType(), cgf.getLoc(e->getSourceRange())); + (void)cgf.emitCompoundStmt(*e->getSubStmt(), &retAlloca); + assert(retAlloca.isValid() && "Expected complex return value"); + return emitLoadOfLValue(cgf.makeAddrLValue(retAlloca, e->getType()), + e->getExprLoc()); } mlir::Value ComplexExprEmitter::emitComplexToComplexCast(mlir::Value val, @@ -534,6 +539,7 @@ mlir::Value ComplexExprEmitter::emitCast(CastKind ck, Expr *op, case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLMatrixTruncation: case CK_HLSLArrayRValue: case CK_HLSLElementwiseCast: case CK_HLSLAggregateSplatCast: diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp index 6af87a0..329fd08 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp @@ -1012,6 +1012,7 @@ public: case CK_MatrixCast: case CK_HLSLArrayRValue: case CK_HLSLVectorTruncation: + case CK_HLSLMatrixTruncation: case CK_HLSLElementwiseCast: case CK_HLSLAggregateSplatCast: return {}; @@ -1521,6 +1522,101 @@ ConstantEmitter::~ConstantEmitter() { "not finalized after being initialized for non-abstract emission"); } +static mlir::TypedAttr emitNullConstantForBase(CIRGenModule &cgm, + mlir::Type baseType, + const CXXRecordDecl *baseDecl); + +static mlir::TypedAttr emitNullConstant(CIRGenModule &cgm, const RecordDecl *rd, + bool asCompleteObject) { + const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rd); + mlir::Type ty = (asCompleteObject ? layout.getCIRType() + : layout.getBaseSubobjectCIRType()); + auto recordTy = mlir::cast<cir::RecordType>(ty); + + unsigned numElements = recordTy.getNumElements(); + SmallVector<mlir::Attribute> elements(numElements); + + auto *cxxrd = dyn_cast<CXXRecordDecl>(rd); + // Fill in all the bases. + if (cxxrd) { + for (const CXXBaseSpecifier &base : cxxrd->bases()) { + if (base.isVirtual()) { + // Ignore virtual bases; if we're laying out for a complete + // object, we'll lay these out later. + continue; + } + + const auto *baseDecl = base.getType()->castAsCXXRecordDecl(); + // Ignore empty bases. + if (isEmptyRecordForLayout(cgm.getASTContext(), base.getType()) || + cgm.getASTContext() + .getASTRecordLayout(baseDecl) + .getNonVirtualSize() + .isZero()) + continue; + + unsigned fieldIndex = layout.getNonVirtualBaseCIRFieldNo(baseDecl); + mlir::Type baseType = recordTy.getElementType(fieldIndex); + elements[fieldIndex] = emitNullConstantForBase(cgm, baseType, baseDecl); + } + } + + // Fill in all the fields. + for (const FieldDecl *field : rd->fields()) { + // Fill in non-bitfields. (Bitfields always use a zero pattern, which we + // will fill in later.) + if (!field->isBitField() && + !isEmptyFieldForLayout(cgm.getASTContext(), field)) { + unsigned fieldIndex = layout.getCIRFieldNo(field); + elements[fieldIndex] = cgm.emitNullConstantAttr(field->getType()); + } + + // For unions, stop after the first named field. + if (rd->isUnion()) { + if (field->getIdentifier()) + break; + if (const auto *fieldRD = field->getType()->getAsRecordDecl()) + if (fieldRD->findFirstNamedDataMember()) + break; + } + } + + // Fill in the virtual bases, if we're working with the complete object. + if (cxxrd && asCompleteObject) { + for ([[maybe_unused]] const CXXBaseSpecifier &vbase : cxxrd->vbases()) { + cgm.errorNYI(vbase.getSourceRange(), "emitNullConstant: virtual base"); + return {}; + } + } + + // Now go through all other fields and zero them out. + for (unsigned i = 0; i != numElements; ++i) { + if (!elements[i]) { + cgm.errorNYI(rd->getSourceRange(), "emitNullConstant: field not zeroed"); + return {}; + } + } + + mlir::MLIRContext *mlirContext = recordTy.getContext(); + return cir::ConstRecordAttr::get(recordTy, + mlir::ArrayAttr::get(mlirContext, elements)); +} + +/// Emit the null constant for a base subobject. +static mlir::TypedAttr emitNullConstantForBase(CIRGenModule &cgm, + mlir::Type baseType, + const CXXRecordDecl *baseDecl) { + const CIRGenRecordLayout &baseLayout = + cgm.getTypes().getCIRGenRecordLayout(baseDecl); + + // Just zero out bases that don't have any pointer to data members. + if (baseLayout.isZeroInitializableAsBase()) + return cgm.getBuilder().getZeroInitAttr(baseType); + + // Otherwise, we can just use its null constant. + return emitNullConstant(cgm, baseDecl, /*asCompleteObject=*/false); +} + mlir::Attribute ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &d) { // Make a quick check if variable can be default NULL initialized // and avoid going through rest of code which may do, for c++11, @@ -1820,23 +1916,32 @@ mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &value, } mlir::Value CIRGenModule::emitNullConstant(QualType t, mlir::Location loc) { - if (t->getAs<PointerType>()) { - return builder.getNullPtr(getTypes().convertTypeForMem(t), loc); - } + return builder.getConstant(loc, emitNullConstantAttr(t)); +} + +mlir::TypedAttr CIRGenModule::emitNullConstantAttr(QualType t) { + if (t->getAs<PointerType>()) + return builder.getConstNullPtrAttr(getTypes().convertTypeForMem(t)); if (getTypes().isZeroInitializable(t)) - return builder.getNullValue(getTypes().convertTypeForMem(t), loc); + return builder.getZeroInitAttr(getTypes().convertTypeForMem(t)); if (getASTContext().getAsConstantArrayType(t)) { - errorNYI("CIRGenModule::emitNullConstant ConstantArrayType"); + errorNYI("CIRGenModule::emitNullConstantAttr ConstantArrayType"); + return {}; } - if (t->isRecordType()) - errorNYI("CIRGenModule::emitNullConstant RecordType"); + if (const RecordType *rt = t->getAs<RecordType>()) + return ::emitNullConstant(*this, rt->getDecl(), /*asCompleteObject=*/true); assert(t->isMemberDataPointerType() && "Should only see pointers to data members here!"); - errorNYI("CIRGenModule::emitNullConstant unsupported type"); + errorNYI("CIRGenModule::emitNullConstantAttr unsupported type"); return {}; } + +mlir::TypedAttr +CIRGenModule::emitNullConstantForBase(const CXXRecordDecl *record) { + return ::emitNullConstant(*this, record, false); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 119314f..25ce1ba 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -15,6 +15,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/StmtVisitor.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "mlir/IR/Location.h" @@ -78,15 +79,20 @@ struct BinOpInfo { class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> { CIRGenFunction &cgf; CIRGenBuilderTy &builder; + // Unlike classic codegen we set this to false or use std::exchange to read + // the value instead of calling TestAndClearIgnoreResultAssign to make it + // explicit when the value is used bool ignoreResultAssign; public: - ScalarExprEmitter(CIRGenFunction &cgf, CIRGenBuilderTy &builder) - : cgf(cgf), builder(builder) {} + ScalarExprEmitter(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + bool ignoreResultAssign = false) + : cgf(cgf), builder(builder), ignoreResultAssign(ignoreResultAssign) {} //===--------------------------------------------------------------------===// // Utilities //===--------------------------------------------------------------------===// + mlir::Type convertType(QualType ty) { return cgf.convertType(ty); } mlir::Value emitComplexToScalarConversion(mlir::Location loc, mlir::Value value, CastKind kind, @@ -133,6 +139,11 @@ public: return {}; } + mlir::Value VisitConstantExpr(ConstantExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: constant expr"); + return {}; + } + mlir::Value VisitPackIndexingExpr(PackIndexingExpr *e) { return Visit(e->getSelectedExpr()); } @@ -150,6 +161,18 @@ public: return cgf.emitLoadOfLValue(lv, e->getExprLoc()).getValue(); } + mlir::Value VisitCoawaitExpr(CoawaitExpr *s) { + return cgf.emitCoawaitExpr(*s).getValue(); + } + mlir::Value VisitCoyieldExpr(CoyieldExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: coyield"); + return {}; + } + mlir::Value VisitUnaryCoawait(const UnaryOperator *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: unary coawait"); + return {}; + } + mlir::Value emitLoadOfLValue(LValue lv, SourceLocation loc) { return cgf.emitLoadOfLValue(lv, loc).getValue(); } @@ -162,12 +185,38 @@ public: return emitLoadOfLValue(e); } + mlir::Value VisitAddrLabelExpr(const AddrLabelExpr *e) { + auto func = cast<cir::FuncOp>(cgf.curFn); + cir::BlockAddrInfoAttr blockInfoAttr = cir::BlockAddrInfoAttr::get( + &cgf.getMLIRContext(), func.getSymName(), e->getLabel()->getName()); + cir::BlockAddressOp blockAddressOp = cir::BlockAddressOp::create( + builder, cgf.getLoc(e->getSourceRange()), cgf.convertType(e->getType()), + blockInfoAttr); + cir::LabelOp resolvedLabel = cgf.cgm.lookupBlockAddressInfo(blockInfoAttr); + if (!resolvedLabel) { + cgf.cgm.mapUnresolvedBlockAddress(blockAddressOp); + // Still add the op to maintain insertion order it will be resolved in + // resolveBlockAddresses + cgf.cgm.mapResolvedBlockAddress(blockAddressOp, nullptr); + } else { + cgf.cgm.mapResolvedBlockAddress(blockAddressOp, resolvedLabel); + } + cgf.instantiateIndirectGotoBlock(); + return blockAddressOp; + } + mlir::Value VisitIntegerLiteral(const IntegerLiteral *e) { mlir::Type type = cgf.convertType(e->getType()); return cir::ConstantOp::create(builder, cgf.getLoc(e->getExprLoc()), cir::IntAttr::get(type, e->getValue())); } + mlir::Value VisitFixedPointLiteral(const FixedPointLiteral *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: fixed point literal"); + return {}; + } + mlir::Value VisitFloatingLiteral(const FloatingLiteral *e) { mlir::Type type = cgf.convertType(e->getType()); assert(mlir::isa<cir::FPTypeInterface>(type) && @@ -193,6 +242,29 @@ public: return emitNullValue(e->getType(), cgf.getLoc(e->getSourceRange())); } + mlir::Value VisitGNUNullExpr(const GNUNullExpr *e) { + return emitNullValue(e->getType(), cgf.getLoc(e->getSourceRange())); + } + + mlir::Value VisitOffsetOfExpr(OffsetOfExpr *e); + + mlir::Value VisitSizeOfPackExpr(SizeOfPackExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: size of pack"); + return {}; + } + mlir::Value VisitPseudoObjectExpr(PseudoObjectExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: pseudo object"); + return {}; + } + mlir::Value VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: sycl unique stable name"); + return {}; + } + mlir::Value VisitEmbedExpr(EmbedExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: embed"); + return {}; + } mlir::Value VisitOpaqueValueExpr(OpaqueValueExpr *e) { if (e->isGLValue()) return emitLoadOfLValue(cgf.getOrCreateOpaqueLValueMapping(e), @@ -202,6 +274,38 @@ public: return cgf.getOrCreateOpaqueRValueMapping(e).getValue(); } + mlir::Value VisitObjCSelectorExpr(ObjCSelectorExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: objc selector"); + return {}; + } + mlir::Value VisitObjCProtocolExpr(ObjCProtocolExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: objc protocol"); + return {}; + } + mlir::Value VisitObjCIVarRefExpr(ObjCIvarRefExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: objc ivar ref"); + return {}; + } + mlir::Value VisitObjCMessageExpr(ObjCMessageExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: objc message"); + return {}; + } + mlir::Value VisitObjCIsaExpr(ObjCIsaExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: objc isa"); + return {}; + } + mlir::Value VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: objc availability check"); + return {}; + } + + mlir::Value VisitMatrixSubscriptExpr(MatrixSubscriptExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: matrix subscript"); + return {}; + } + mlir::Value VisitCastExpr(CastExpr *e); mlir::Value VisitCallExpr(const CallExpr *e); @@ -221,6 +325,8 @@ public: } mlir::Value VisitArraySubscriptExpr(ArraySubscriptExpr *e) { + ignoreResultAssign = false; + if (e->getBase()->getType()->isVectorType()) { assert(!cir::MissingFeatures::scalableVectors()); @@ -271,6 +377,8 @@ public: e->getSourceRange().getBegin()); } + mlir::Value VisitExtVectorElementExpr(Expr *e) { return emitLoadOfLValue(e); } + mlir::Value VisitMemberExpr(MemberExpr *e); mlir::Value VisitCompoundLiteralExpr(CompoundLiteralExpr *e) { @@ -279,6 +387,18 @@ public: mlir::Value VisitInitListExpr(InitListExpr *e); + mlir::Value VisitArrayInitIndexExpr(ArrayInitIndexExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: array init index"); + return {}; + } + + mlir::Value VisitImplicitValueInitExpr(const ImplicitValueInitExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: implicit value init"); + return {}; + } + mlir::Value VisitExplicitCastExpr(ExplicitCastExpr *e) { return VisitCastExpr(e); } @@ -432,6 +552,10 @@ public: return cgf.emitVAArg(ve); } + mlir::Value VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *e) { + return Visit(e->getSemanticForm()); + } + mlir::Value VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *e); mlir::Value VisitAbstractConditionalOperator(const AbstractConditionalOperator *e); @@ -607,11 +731,8 @@ public: } mlir::Value VisitUnaryAddrOf(const UnaryOperator *e) { - if (llvm::isa<MemberPointerType>(e->getType())) { - cgf.cgm.errorNYI(e->getSourceRange(), "Address of member pointer"); - return builder.getNullPtr(cgf.convertType(e->getType()), - cgf.getLoc(e->getExprLoc())); - } + if (llvm::isa<MemberPointerType>(e->getType())) + return cgf.cgm.emitMemberPointerConstant(e); return cgf.emitLValue(e->getSubExpr()).getPointer(); } @@ -682,6 +803,20 @@ public: return Visit(e->getSubExpr()); } + // C++ + mlir::Value VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: materialize temporary"); + return {}; + } + mlir::Value VisitSourceLocExpr(SourceLocExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: source loc"); + return {}; + } + mlir::Value VisitCXXDefaultArgExpr(CXXDefaultArgExpr *dae) { + CIRGenFunction::CXXDefaultArgExprScope scope(cgf, dae); + return Visit(dae->getExpr()); + } mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) { CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die); return Visit(die->getExpr()); @@ -697,11 +832,43 @@ public: cgf.emitCXXDeleteExpr(e); return {}; } - + mlir::Value VisitTypeTraitExpr(const TypeTraitExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: type trait"); + return {}; + } + mlir::Value + VisitConceptSpecializationExpr(const ConceptSpecializationExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: concept specialization"); + return {}; + } + mlir::Value VisitRequiresExpr(const RequiresExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: requires"); + return {}; + } + mlir::Value VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: array type trait"); + return {}; + } + mlir::Value VisitExpressionTraitExpr(const ExpressionTraitExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: expression trait"); + return {}; + } + mlir::Value VisitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: cxx pseudo destructor"); + return {}; + } mlir::Value VisitCXXThrowExpr(const CXXThrowExpr *e) { cgf.emitCXXThrowExpr(e); return {}; } + mlir::Value VisitCXXNoexceptExpr(CXXNoexceptExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: cxx noexcept"); + return {}; + } /// Emit a conversion from the specified type to the specified destination /// type, both of which are CIR scalar types. @@ -839,6 +1006,7 @@ public: BinOpInfo emitBinOps(const BinaryOperator *e, QualType promotionType = QualType()) { + ignoreResultAssign = false; BinOpInfo result; result.lhs = cgf.emitPromotedScalarExpr(e->getLHS(), promotionType); result.rhs = cgf.emitPromotedScalarExpr(e->getRHS(), promotionType); @@ -924,6 +1092,7 @@ public: #undef HANDLEBINOP mlir::Value emitCmp(const BinaryOperator *e) { + ignoreResultAssign = false; const mlir::Location loc = cgf.getLoc(e->getExprLoc()); mlir::Value result; QualType lhsTy = e->getLHS()->getType(); @@ -1163,6 +1332,52 @@ public: return maybePromoteBoolResult(resOp.getResult(), resTy); } + mlir::Value VisitBinPtrMemD(const BinaryOperator *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: ptr mem d"); + return {}; + } + + mlir::Value VisitBinPtrMemI(const BinaryOperator *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: ptr mem i"); + return {}; + } + + // Other Operators. + mlir::Value VisitBlockExpr(const BlockExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: block"); + return {}; + } + + mlir::Value VisitChooseExpr(ChooseExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: choose"); + return {}; + } + + mlir::Value VisitObjCStringLiteral(const ObjCStringLiteral *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: objc string literal"); + return {}; + } + mlir::Value VisitObjCBoxedExpr(ObjCBoxedExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: objc boxed"); + return {}; + } + mlir::Value VisitObjCArrayLiteral(ObjCArrayLiteral *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: objc array literal"); + return {}; + } + mlir::Value VisitObjCDictionaryLiteral(ObjCDictionaryLiteral *e) { + cgf.cgm.errorNYI(e->getSourceRange(), + "ScalarExprEmitter: objc dictionary literal"); + return {}; + } + + mlir::Value VisitAsTypeExpr(AsTypeExpr *e) { + cgf.cgm.errorNYI(e->getSourceRange(), "ScalarExprEmitter: as type"); + return {}; + } + mlir::Value VisitAtomicExpr(AtomicExpr *e) { return cgf.emitAtomicExpr(e).getValue(); } @@ -1406,11 +1621,13 @@ CIRGenFunction::emitCompoundAssignmentLValue(const CompoundAssignOperator *e) { } /// Emit the computation of the specified expression of scalar type. -mlir::Value CIRGenFunction::emitScalarExpr(const Expr *e) { +mlir::Value CIRGenFunction::emitScalarExpr(const Expr *e, + bool ignoreResultAssign) { assert(e && hasScalarEvaluationKind(e->getType()) && "Invalid scalar expression to emit"); - return ScalarExprEmitter(*this, builder).Visit(const_cast<Expr *>(e)); + return ScalarExprEmitter(*this, builder, ignoreResultAssign) + .Visit(const_cast<Expr *>(e)); } mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e, @@ -1874,6 +2091,35 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { return cgf.getBuilder().createBitcast(cgf.getLoc(subExpr->getSourceRange()), src, dstTy); } + case CK_AddressSpaceConversion: { + Expr::EvalResult result; + if (subExpr->EvaluateAsRValue(result, cgf.getContext()) && + result.Val.isNullPointer()) { + // If e has side effect, it is emitted even if its final result is a + // null pointer. In that case, a DCE pass should be able to + // eliminate the useless instructions emitted during translating E. + if (result.HasSideEffects) + Visit(subExpr); + return cgf.cgm.emitNullConstant(destTy, + cgf.getLoc(subExpr->getExprLoc())); + } + + clang::QualType srcTy = subExpr->IgnoreImpCasts()->getType(); + if (srcTy->isPointerType() || srcTy->isReferenceType()) + srcTy = srcTy->getPointeeType(); + + clang::LangAS srcLangAS = srcTy.getAddressSpace(); + cir::TargetAddressSpaceAttr subExprAS; + if (clang::isTargetAddressSpace(srcLangAS)) + subExprAS = cir::toCIRTargetAddressSpace(cgf.getMLIRContext(), srcLangAS); + else + cgf.cgm.errorNYI(subExpr->getSourceRange(), + "non-target address space conversion"); + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return cgf.cgm.getTargetCIRGenInfo().performAddrSpaceCast( + cgf, Visit(subExpr), subExprAS, convertType(destTy)); + } case CK_AtomicToNonAtomic: { cgf.getCIRGenModule().errorNYI(subExpr->getSourceRange(), @@ -1919,6 +2165,27 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { return builder.createIntToPtr(middleVal, destCIRTy); } + case CK_BaseToDerived: { + const CXXRecordDecl *derivedClassDecl = destTy->getPointeeCXXRecordDecl(); + assert(derivedClassDecl && "BaseToDerived arg isn't a C++ object pointer!"); + Address base = cgf.emitPointerWithAlignment(subExpr); + Address derived = cgf.getAddressOfDerivedClass( + cgf.getLoc(ce->getSourceRange()), base, derivedClassDecl, ce->path(), + cgf.shouldNullCheckClassCastValue(ce)); + + // C++11 [expr.static.cast]p11: Behavior is undefined if a downcast is + // performed and the object is not of the derived type. + assert(!cir::MissingFeatures::sanitizers()); + + return cgf.getAsNaturalPointerTo(derived, ce->getType()->getPointeeType()); + } + case CK_UncheckedDerivedToBase: + case CK_DerivedToBase: { + // The EmitPointerWithAlignment path does this fine; just discard + // the alignment. + return cgf.getAsNaturalPointerTo(cgf.emitPointerWithAlignment(ce), + ce->getType()->getPointeeType()); + } case CK_Dynamic: { Address v = cgf.emitPointerWithAlignment(subExpr); const auto *dce = cast<CXXDynamicCastExpr>(ce); @@ -2054,6 +2321,11 @@ mlir::Value ScalarExprEmitter::VisitMemberExpr(MemberExpr *e) { mlir::Value ScalarExprEmitter::VisitInitListExpr(InitListExpr *e) { const unsigned numInitElements = e->getNumInits(); + [[maybe_unused]] const bool ignore = std::exchange(ignoreResultAssign, false); + assert((ignore == false || + (numInitElements == 0 && e->getType()->isVoidType())) && + "init list ignored"); + if (e->hadArrayRangeDesignator()) { cgf.cgm.errorNYI(e->getSourceRange(), "ArrayRangeDesignator"); return {}; @@ -2145,6 +2417,21 @@ mlir::Value ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *e) { return maybePromoteBoolResult(boolVal, cgf.convertType(e->getType())); } +mlir::Value ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *e) { + // Try folding the offsetof to a constant. + Expr::EvalResult evalResult; + if (e->EvaluateAsInt(evalResult, cgf.getContext())) { + mlir::Type type = cgf.convertType(e->getType()); + llvm::APSInt value = evalResult.Val.getInt(); + return builder.getConstAPInt(cgf.getLoc(e->getExprLoc()), type, value); + } + + cgf.getCIRGenModule().errorNYI( + e->getSourceRange(), + "ScalarExprEmitter::VisitOffsetOfExpr Can't eval expr as int"); + return {}; +} + mlir::Value ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *e) { QualType promotionTy = getPromotionType(e->getSubExpr()->getType()); mlir::Value result = VisitRealImag(e, promotionTy); @@ -2214,14 +2501,50 @@ mlir::Value ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( const QualType typeToSize = e->getTypeOfArgument(); const mlir::Location loc = cgf.getLoc(e->getSourceRange()); if (auto kind = e->getKind(); - kind == UETT_SizeOf || kind == UETT_DataSizeOf) { - if (cgf.getContext().getAsVariableArrayType(typeToSize)) { - cgf.getCIRGenModule().errorNYI(e->getSourceRange(), - "sizeof operator for VariableArrayType", - e->getStmtClassName()); - return builder.getConstant( - loc, cir::IntAttr::get(cgf.cgm.uInt64Ty, - llvm::APSInt(llvm::APInt(64, 1), true))); + kind == UETT_SizeOf || kind == UETT_DataSizeOf || kind == UETT_CountOf) { + if (const VariableArrayType *vat = + cgf.getContext().getAsVariableArrayType(typeToSize)) { + // For _Countof, we only want to evaluate if the extent is actually + // variable as opposed to a multi-dimensional array whose extent is + // constant but whose element type is variable. + bool evaluateExtent = true; + if (kind == UETT_CountOf && vat->getElementType()->isArrayType()) { + evaluateExtent = + !vat->getSizeExpr()->isIntegerConstantExpr(cgf.getContext()); + } + + if (evaluateExtent) { + if (e->isArgumentType()) { + // sizeof(type) - make sure to emit the VLA size. + cgf.emitVariablyModifiedType(typeToSize); + } else { + // C99 6.5.3.4p2: If the argument is an expression of type + // VLA, it is evaluated. + cgf.emitIgnoredExpr(e->getArgumentExpr()); + } + + // For _Countof, we just want to return the size of a single dimension. + if (kind == UETT_CountOf) + return cgf.getVLAElements1D(vat).numElts; + + // For sizeof and __datasizeof, we need to scale the number of elements + // by the size of the array element type. + CIRGenFunction::VlaSizePair vlaSize = cgf.getVLASize(vat); + mlir::Value numElts = vlaSize.numElts; + + // Scale the number of non-VLA elements by the non-VLA element size. + CharUnits eltSize = cgf.getContext().getTypeSizeInChars(vlaSize.type); + if (!eltSize.isOne()) { + mlir::Location loc = cgf.getLoc(e->getSourceRange()); + mlir::Value eltSizeValue = + builder.getConstAPInt(numElts.getLoc(), numElts.getType(), + cgf.cgm.getSize(eltSize).getValue()); + return builder.createMul(loc, eltSizeValue, numElts, + cir::OverflowBehavior::NoUnsignedWrap); + } + + return numElts; + } } } else if (e->getKind() == UETT_OpenMPRequiredSimdAlign) { cgf.getCIRGenModule().errorNYI( @@ -2285,9 +2608,9 @@ mlir::Value ScalarExprEmitter::VisitAbstractConditionalOperator( // type, so evaluating it returns a null Value. However, a conditional // with non-void type must return a non-null Value. if (!result && !e->getType()->isVoidType()) { - cgf.cgm.errorNYI(e->getSourceRange(), - "throw expression in conditional operator"); - result = {}; + result = builder.getConstant( + loc, cir::PoisonAttr::get(builder.getContext(), + cgf.convertType(e->getType()))); } return result; diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 5d5209b..6b2e60a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -16,6 +16,7 @@ #include "CIRGenCall.h" #include "CIRGenValue.h" #include "mlir/IR/Location.h" +#include "clang/AST/Attr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/GlobalDecl.h" #include "clang/CIR/MissingFeatures.h" @@ -412,28 +413,45 @@ void CIRGenFunction::LexicalScope::emitImplicitReturn() { (void)emitReturn(localScope->endLoc); } -void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, - cir::FuncOp fn, cir::FuncType funcType, - FunctionArgList args, SourceLocation loc, - SourceLocation startLoc) { - assert(!curFn && - "CIRGenFunction can only be used for one function at a time"); +cir::TryOp CIRGenFunction::LexicalScope::getClosestTryParent() { + LexicalScope *scope = this; + while (scope) { + if (scope->isTry()) + return scope->getTry(); + scope = scope->parentScope; + } + return nullptr; +} - curFn = fn; +/// An argument came in as a promoted argument; demote it back to its +/// declared type. +static mlir::Value emitArgumentDemotion(CIRGenFunction &cgf, const VarDecl *var, + mlir::Value value) { + mlir::Type ty = cgf.convertType(var->getType()); - const Decl *d = gd.getDecl(); + // This can happen with promotions that actually don't change the + // underlying type, like the enum promotions. + if (value.getType() == ty) + return value; - didCallStackSave = false; - curCodeDecl = d; - const auto *fd = dyn_cast_or_null<FunctionDecl>(d); - curFuncDecl = d->getNonClosureContext(); + assert((mlir::isa<cir::IntType>(ty) || cir::isAnyFloatingPointType(ty)) && + "unexpected promotion type"); - prologueCleanupDepth = ehStack.stable_begin(); + if (mlir::isa<cir::IntType>(ty)) + return cgf.getBuilder().CIRBaseBuilderTy::createIntCast(value, ty); - mlir::Block *entryBB = &fn.getBlocks().front(); - builder.setInsertionPointToStart(entryBB); + return cgf.getBuilder().createFloatingCast(value, ty); +} + +void CIRGenFunction::emitFunctionProlog(const FunctionArgList &args, + mlir::Block *entryBB, + const FunctionDecl *fd, + SourceLocation bodyBeginLoc) { + // Naked functions don't have prologues. + if (fd && fd->hasAttr<NakedAttr>()) { + cgm.errorNYI(bodyBeginLoc, "naked function decl"); + } - // TODO(cir): this should live in `emitFunctionProlog // Declare all the function arguments in the symbol table. for (const auto nameValue : llvm::zip(args, entryBB->getArguments())) { const VarDecl *paramVar = std::get<0>(nameValue); @@ -456,20 +474,64 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, cast<ParmVarDecl>(paramVar)->isKNRPromoted(); assert(!cir::MissingFeatures::constructABIArgDirectExtend()); if (isPromoted) - cgm.errorNYI(fd->getSourceRange(), "Function argument demotion"); + paramVal = emitArgumentDemotion(*this, paramVar, paramVal); // Location of the store to the param storage tracked as beginning of // the function body. - mlir::Location fnBodyBegin = getLoc(fd->getBody()->getBeginLoc()); + mlir::Location fnBodyBegin = getLoc(bodyBeginLoc); builder.CIRBaseBuilderTy::createStore(fnBodyBegin, paramVal, addrVal); } assert(builder.getInsertionBlock() && "Should be valid"); +} + +void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, + cir::FuncOp fn, cir::FuncType funcType, + FunctionArgList args, SourceLocation loc, + SourceLocation startLoc) { + assert(!curFn && + "CIRGenFunction can only be used for one function at a time"); + + curFn = fn; + + const Decl *d = gd.getDecl(); + + didCallStackSave = false; + curCodeDecl = d; + const auto *fd = dyn_cast_or_null<FunctionDecl>(d); + curFuncDecl = d->getNonClosureContext(); + + prologueCleanupDepth = ehStack.stable_begin(); + + mlir::Block *entryBB = &fn.getBlocks().front(); + builder.setInsertionPointToStart(entryBB); + + // Determine the function body begin location for the prolog. + // If fd is null or has no body, use startLoc as fallback. + SourceLocation bodyBeginLoc = startLoc; + if (fd) { + if (Stmt *body = fd->getBody()) + bodyBeginLoc = body->getBeginLoc(); + else + bodyBeginLoc = fd->getLocation(); + } + + emitFunctionProlog(args, entryBB, fd, bodyBeginLoc); // When the current function is not void, create an address to store the // result value. - if (!returnType->isVoidType()) - emitAndUpdateRetAlloca(returnType, getLoc(fd->getBody()->getEndLoc()), + if (!returnType->isVoidType()) { + // Determine the function body end location. + // If fd is null or has no body, use loc as fallback. + SourceLocation bodyEndLoc = loc; + if (fd) { + if (Stmt *body = fd->getBody()) + bodyEndLoc = body->getEndLoc(); + else + bodyEndLoc = fd->getLocation(); + } + emitAndUpdateRetAlloca(returnType, getLoc(bodyEndLoc), getContext().getTypeAlignInChars(returnType)); + } if (isa_and_nonnull<CXXMethodDecl>(d) && cast<CXXMethodDecl>(d)->isInstance()) { @@ -521,7 +583,48 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, } } +void CIRGenFunction::resolveBlockAddresses() { + for (cir::BlockAddressOp &blockAddress : cgm.unresolvedBlockAddressToLabel) { + cir::LabelOp labelOp = + cgm.lookupBlockAddressInfo(blockAddress.getBlockAddrInfo()); + assert(labelOp && "expected cir.labelOp to already be emitted"); + cgm.updateResolvedBlockAddress(blockAddress, labelOp); + } + cgm.unresolvedBlockAddressToLabel.clear(); +} + +void CIRGenFunction::finishIndirectBranch() { + if (!indirectGotoBlock) + return; + llvm::SmallVector<mlir::Block *> succesors; + llvm::SmallVector<mlir::ValueRange> rangeOperands; + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(indirectGotoBlock); + for (auto &[blockAdd, labelOp] : cgm.blockAddressToLabel) { + succesors.push_back(labelOp->getBlock()); + rangeOperands.push_back(labelOp->getBlock()->getArguments()); + } + cir::IndirectBrOp::create(builder, builder.getUnknownLoc(), + indirectGotoBlock->getArgument(0), false, + rangeOperands, succesors); + cgm.blockAddressToLabel.clear(); +} + void CIRGenFunction::finishFunction(SourceLocation endLoc) { + // Resolve block address-to-label mappings, then emit the indirect branch + // with the corresponding targets. + resolveBlockAddresses(); + finishIndirectBranch(); + + // If a label address was taken but no indirect goto was used, we can't remove + // the block argument here. Instead, we mark the 'indirectbr' op + // as poison so that the cleanup can be deferred to lowering, since the + // verifier doesn't allow the 'indirectbr' target address to be null. + if (indirectGotoBlock && indirectGotoBlock->hasNoPredecessors()) { + auto indrBr = cast<cir::IndirectBrOp>(indirectGotoBlock->front()); + indrBr.setPoison(true); + } + // Pop any cleanups that might have been associated with the // parameters. Do this in whatever block we're currently in; it's // important to do this before we enter the return block or return @@ -560,7 +663,7 @@ static void eraseEmptyAndUnusedBlocks(cir::FuncOp func) { cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, cir::FuncType funcType) { - const auto funcDecl = cast<FunctionDecl>(gd.getDecl()); + const auto *funcDecl = cast<FunctionDecl>(gd.getDecl()); curGD = gd; if (funcDecl->isInlineBuiltinDeclaration()) { @@ -630,8 +733,13 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, { LexicalScope lexScope(*this, fusedLoc, entryBB); + // Emit the standard function prologue. startFunction(gd, retTy, fn, funcType, args, loc, bodyRange.getBegin()); + // Save parameters for coroutine function. + if (body && isa_and_nonnull<CoroutineBodyStmt>(body)) + llvm::append_range(fnArgs, funcDecl->parameters()); + if (isa<CXXDestructorDecl>(funcDecl)) { emitDestructorBody(args); } else if (isa<CXXConstructorDecl>(funcDecl)) { @@ -652,6 +760,7 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, // copy-constructors. emitImplicitAssignmentOperatorBody(args); } else if (body) { + // Emit standard function body. if (mlir::failed(emitFunctionBody(body))) { return nullptr; } @@ -679,6 +788,8 @@ void CIRGenFunction::emitConstructorBody(FunctionArgList &args) { ctorType == Ctor_Complete) && "can only generate complete ctor for this ABI"); + cgm.setCXXSpecialMemberAttr(cast<cir::FuncOp>(curFn), ctor); + if (ctorType == Ctor_Complete && isConstructorDelegationValid(ctor) && cgm.getTarget().getCXXABI().hasConstructorVariants()) { emitDelegateCXXConstructorCall(ctor, Ctor_Base, args, ctor->getEndLoc()); @@ -717,6 +828,8 @@ void CIRGenFunction::emitDestructorBody(FunctionArgList &args) { const CXXDestructorDecl *dtor = cast<CXXDestructorDecl>(curGD.getDecl()); CXXDtorType dtorType = curGD.getDtorType(); + cgm.setCXXSpecialMemberAttr(cast<cir::FuncOp>(curFn), dtor); + // For an abstract class, non-base destructors are never used (and can't // be emitted in general, because vbase dtors may not have been validated // by Sema), but the Itanium ABI doesn't make them optional and Clang may @@ -883,6 +996,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { return emitConditionalOperatorLValue(cast<BinaryConditionalOperator>(e)); case Expr::ArraySubscriptExprClass: return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e)); + case Expr::ExtVectorElementExprClass: + return emitExtVectorElementExpr(cast<ExtVectorElementExpr>(e)); case Expr::UnaryOperatorClass: return emitUnaryOpLValue(cast<UnaryOperator>(e)); case Expr::StringLiteralClass: @@ -912,6 +1027,18 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { case Expr::CXXOperatorCallExprClass: case Expr::UserDefinedLiteralClass: return emitCallExprLValue(cast<CallExpr>(e)); + case Expr::ExprWithCleanupsClass: { + const auto *cleanups = cast<ExprWithCleanups>(e); + RunCleanupsScope scope(*this); + LValue lv = emitLValue(cleanups->getSubExpr()); + assert(!cir::MissingFeatures::cleanupWithPreservedValues()); + return lv; + } + case Expr::CXXDefaultArgExprClass: { + auto *dae = cast<CXXDefaultArgExpr>(e); + CXXDefaultArgExprScope scope(*this, dae); + return emitLValue(dae->getExpr()); + } case Expr::ParenExprClass: return emitLValue(cast<ParenExpr>(e)->getSubExpr()); case Expr::GenericSelectionExprClass: @@ -1052,6 +1179,17 @@ CIRGenFunction::emitArrayLength(const clang::ArrayType *origArrayType, return builder.getConstInt(*currSrcLoc, sizeTy, countFromCLAs); } +void CIRGenFunction::instantiateIndirectGotoBlock() { + // If we already made the indirect branch for indirect goto, return its block. + if (indirectGotoBlock) + return; + + mlir::OpBuilder::InsertionGuard guard(builder); + indirectGotoBlock = + builder.createBlock(builder.getBlock()->getParent(), {}, {voidPtrTy}, + {builder.getUnknownLoc()}); +} + mlir::Value CIRGenFunction::emitAlignmentAssumption( mlir::Value ptrValue, QualType ty, SourceLocation loc, SourceLocation assumptionLoc, int64_t alignment, mlir::Value offsetValue) { @@ -1104,6 +1242,14 @@ CIRGenFunction::getVLASize(const VariableArrayType *type) { return {numElements, elementType}; } +CIRGenFunction::VlaSizePair +CIRGenFunction::getVLAElements1D(const VariableArrayType *vla) { + mlir::Value vlaSize = vlaSizeMap[vla->getSizeExpr()]; + assert(vlaSize && "no size for VLA!"); + assert(vlaSize.getType() == sizeTy); + return {vlaSize, vla->getElementType()}; +} + // TODO(cir): Most of this function can be shared between CIRGen // and traditional LLVM codegen void CIRGenFunction::emitVariablyModifiedType(QualType type) { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index e5cecaa5..15322ee 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -30,6 +30,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" +#include "clang/Basic/OperatorKinds.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/MissingFeatures.h" #include "clang/CIR/TypeEvaluationKind.h" @@ -122,6 +123,10 @@ public: GlobalDecl curSEHParent; + /// A mapping from NRVO variables to the flags used to indicate + /// when the NRVO has been applied to this variable. + llvm::DenseMap<const VarDecl *, mlir::Value> nrvoFlags; + llvm::DenseMap<const clang::ValueDecl *, clang::FieldDecl *> lambdaCaptureFields; clang::FieldDecl *lambdaThisCaptureField = nullptr; @@ -152,6 +157,9 @@ public: /// global initializers. mlir::Operation *curFn = nullptr; + /// Save Parameter Decl for coroutine. + llvm::SmallVector<const ParmVarDecl *> fnArgs; + using DeclMapTy = llvm::DenseMap<const clang::Decl *, Address>; /// This keeps track of the CIR allocas or globals for local C /// declarations. @@ -199,6 +207,22 @@ public: return convertType(getContext().getTypeDeclType(t)); } + /// Get integer from a mlir::Value that is an int constant or a constant op. + static int64_t getSExtIntValueFromConstOp(mlir::Value val) { + auto constOp = val.getDefiningOp<cir::ConstantOp>(); + assert(constOp && "getIntValueFromConstOp call with non ConstantOp"); + return constOp.getIntValue().getSExtValue(); + } + + /// Get zero-extended integer from a mlir::Value that is an int constant or a + /// constant op. + static int64_t getZExtIntValueFromConstOp(mlir::Value val) { + auto constOp = val.getDefiningOp<cir::ConstantOp>(); + assert(constOp && + "getZeroExtendedIntValueFromConstOp call with non ConstantOp"); + return constOp.getIntValue().getZExtValue(); + } + /// Return the cir::TypeEvaluationKind of QualType \c type. static cir::TypeEvaluationKind getEvaluationKind(clang::QualType type); @@ -219,6 +243,10 @@ public: const TargetInfo &getTarget() const { return cgm.getTarget(); } mlir::MLIRContext &getMLIRContext() { return cgm.getMLIRContext(); } + const TargetCIRGenInfo &getTargetHooks() const { + return cgm.getTargetCIRGenInfo(); + } + // --------------------- // Opaque value handling // --------------------- @@ -490,6 +518,10 @@ public: VlaSizePair(mlir::Value num, QualType ty) : numElts(num), type(ty) {} }; + /// Return the number of elements for a single dimension + /// for the given array type. + VlaSizePair getVLAElements1D(const VariableArrayType *vla); + /// Returns an MLIR::Value+QualType pair that corresponds to the size, /// in non-variably-sized elements, of a variable length array type, /// plus that largest non-variably-sized element type. Assumes that @@ -497,6 +529,12 @@ public: VlaSizePair getVLASize(const VariableArrayType *type); VlaSizePair getVLASize(QualType type); + Address getAsNaturalAddressOf(Address addr, QualType pointeeTy); + + mlir::Value getAsNaturalPointerTo(Address addr, QualType pointeeType) { + return getAsNaturalAddressOf(addr, pointeeType).getBasePointer(); + } + void finishFunction(SourceLocation endLoc); /// Determine whether the given initializer is trivial in the sense @@ -517,6 +555,8 @@ public: /// that we can just remove the code. bool containsLabel(const clang::Stmt *s, bool ignoreCaseStmts = false); + Address emitExtVectorElementLValue(LValue lv, mlir::Location loc); + class ConstantEmission { // Cannot use mlir::TypedAttr directly here because of bit availability. llvm::PointerIntPair<mlir::Attribute, 1, bool> valueAndIsReference; @@ -618,6 +658,14 @@ public: return JumpDest(target, ehStack.getInnermostNormalCleanup(), nextCleanupDestIndex++); } + /// IndirectBranch - The first time an indirect goto is seen we create a block + /// reserved for the indirect branch. Unlike before,the actual 'indirectbr' + /// is emitted at the end of the function, once all block destinations have + /// been resolved. + mlir::Block *indirectGotoBlock = nullptr; + + void resolveBlockAddresses(); + void finishIndirectBranch(); /// Perform the usual unary conversions on the specified expression and /// compare the result against zero, returning an Int1Ty value. @@ -810,6 +858,11 @@ public: llvm::iterator_range<CastExpr::path_const_iterator> path, bool nullCheckValue, SourceLocation loc); + Address getAddressOfDerivedClass( + mlir::Location loc, Address baseAddr, const CXXRecordDecl *derived, + llvm::iterator_range<CastExpr::path_const_iterator> path, + bool nullCheckValue); + /// Return the VTT parameter that should be passed to a base /// constructor/destructor with virtual bases. /// FIXME: VTTs are Itanium ABI-specific, so the definition should move @@ -901,6 +954,11 @@ public: clang::QualType buildFunctionArgList(clang::GlobalDecl gd, FunctionArgList &args); + /// Emit the function prologue: declare function arguments in the symbol + /// table. + void emitFunctionProlog(const FunctionArgList &args, mlir::Block *entryBB, + const FunctionDecl *fd, SourceLocation bodyBeginLoc); + /// Emit code for the start of a function. /// \param loc The location to be associated with the function. /// \param startLoc The location of the function body. @@ -916,10 +974,16 @@ public: return false; } + void populateEHCatchRegions(EHScopeStack::stable_iterator scope, + cir::TryOp tryOp); + /// The cleanup depth enclosing all the cleanups associated with the /// parameters. EHScopeStack::stable_iterator prologueCleanupDepth; + bool isCatchOrCleanupRequired(); + void populateCatchHandlersIfRequired(cir::TryOp tryOp); + /// Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. void popCleanupBlocks(EHScopeStack::stable_iterator oldCleanupStackDepth); @@ -1063,7 +1127,7 @@ public: bool isSwitch() { return scopeKind == Kind::Switch; } bool isTernary() { return scopeKind == Kind::Ternary; } bool isTry() { return scopeKind == Kind::Try; } - + cir::TryOp getClosestTryParent(); void setAsGlobalInit() { scopeKind = Kind::GlobalInit; } void setAsSwitch() { scopeKind = Kind::Switch; } void setAsTernary() { scopeKind = Kind::Ternary; } @@ -1189,6 +1253,14 @@ public: /// CIR emit functions /// ---------------------- public: + mlir::Value emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, + ReturnValueSlot returnValue, + llvm::Triple::ArchType arch); + mlir::Value emitAArch64SMEBuiltinExpr(unsigned builtinID, + const CallExpr *expr); + mlir::Value emitAArch64SVEBuiltinExpr(unsigned builtinID, + const CallExpr *expr); + mlir::Value emitAlignmentAssumption(mlir::Value ptrValue, QualType ty, SourceLocation loc, SourceLocation assumptionLoc, @@ -1264,6 +1336,8 @@ public: QualType &baseType, Address &addr); LValue emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e); + LValue emitExtVectorElementExpr(const ExtVectorElementExpr *e); + Address emitArrayToPointerDecay(const Expr *e, LValueBaseInfo *baseInfo = nullptr); @@ -1271,6 +1345,9 @@ public: RValue emitAtomicExpr(AtomicExpr *e); void emitAtomicInit(Expr *init, LValue dest); + void emitAtomicStore(RValue rvalue, LValue dest, bool isInit); + void emitAtomicStore(RValue rvalue, LValue dest, cir::MemOrder order, + bool isVolatile, bool isInit); AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d, mlir::OpBuilder::InsertPoint ip = {}); @@ -1304,6 +1381,32 @@ public: RValue emitBuiltinExpr(const clang::GlobalDecl &gd, unsigned builtinID, const clang::CallExpr *e, ReturnValueSlot returnValue); + /// Returns a Value corresponding to the size of the given expression by + /// emitting a `cir.objsize` operation. + /// + /// \param e The expression whose object size to compute + /// \param type Determines the semantics of the object size computation. + /// The type parameter is a 2-bit value where: + /// bit 0 (type & 1): 0 = whole object, 1 = closest subobject + /// bit 1 (type & 2): 0 = maximum size, 2 = minimum size + /// \param resType The result type for the size value + /// \param emittedE Optional pre-emitted pointer value. If non-null, we'll + /// call `cir.objsize` on this value rather than emitting e. + /// \param isDynamic If true, allows runtime evaluation via dynamic mode + mlir::Value emitBuiltinObjectSize(const clang::Expr *e, unsigned type, + cir::IntType resType, mlir::Value emittedE, + bool isDynamic); + + mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e, + unsigned type, + cir::IntType resType, + mlir::Value emittedE, + bool isDynamic); + + int64_t getAccessedFieldNo(unsigned idx, mlir::ArrayAttr elts); + + void instantiateIndirectGotoBlock(); + RValue emitCall(const CIRGenFunctionInfo &funcInfo, const CIRGenCallee &callee, ReturnValueSlot returnValue, const CallArgList &args, cir::CIRCallOpInterface *callOp, @@ -1362,6 +1465,7 @@ public: cir::CallOp emitCoroAllocBuiltinCall(mlir::Location loc); cir::CallOp emitCoroBeginBuiltinCall(mlir::Location loc, mlir::Value coroframeAddr); + RValue emitCoroutineFrame(); void emitDestroy(Address addr, QualType type, Destroyer *destroyer); @@ -1434,6 +1538,10 @@ public: RValue emitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *expr); + RValue emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, + const CallExpr *callExpr, + OverloadedOperatorKind op); + void emitCXXTemporary(const CXXTemporary *temporary, QualType tempType, Address ptr); @@ -1482,6 +1590,8 @@ public: mlir::LogicalResult emitGotoStmt(const clang::GotoStmt &s); + mlir::LogicalResult emitIndirectGotoStmt(const IndirectGotoStmt &s); + void emitImplicitAssignmentOperatorBody(FunctionArgList &args); void emitInitializerForField(clang::FieldDecl *field, LValue lhs, @@ -1501,7 +1611,8 @@ public: llvm::ArrayRef<mlir::Value> args = {}); /// Emit the computation of the specified expression of scalar type. - mlir::Value emitScalarExpr(const clang::Expr *e); + mlir::Value emitScalarExpr(const clang::Expr *e, + bool ignoreResultAssign = false); mlir::Value emitScalarPrePostIncDec(const UnaryOperator *e, LValue lv, cir::UnaryOpKind kind, bool isPre); @@ -1522,6 +1633,9 @@ public: void emitForwardingCallToLambda(const CXXMethodDecl *lambdaCallOperator, CallArgList &callArgs); + RValue emitCoawaitExpr(const CoawaitExpr &e, + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); /// Emit the computation of the specified expression of complex type, /// returning the result. mlir::Value emitComplexExpr(const Expr *e); @@ -1587,6 +1701,8 @@ public: void emitLambdaDelegatingInvokeBody(const CXXMethodDecl *md); void emitLambdaStaticInvokeBody(const CXXMethodDecl *md); + void populateCatchHandlers(cir::TryOp tryOp); + mlir::LogicalResult emitIfStmt(const clang::IfStmt &s); /// Emit code to compute the specified expression, @@ -1598,6 +1714,8 @@ public: /// Load a complex number from the specified l-value. mlir::Value emitLoadOfComplex(LValue src, SourceLocation loc); + RValue emitLoadOfExtVectorElementLValue(LValue lv); + /// Given an expression that represents a value lvalue, this method emits /// the address of the lvalue, then loads the result as an rvalue, /// returning the rvalue. @@ -1673,14 +1791,17 @@ public: void emitScalarInit(const clang::Expr *init, mlir::Location loc, LValue lvalue, bool capturedByInit = false); + mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned idx, + const Expr *argExpr); + void emitStaticVarDecl(const VarDecl &d, cir::GlobalLinkageKind linkage); void emitStoreOfComplex(mlir::Location loc, mlir::Value v, LValue dest, bool isInit); void emitStoreOfScalar(mlir::Value value, Address addr, bool isVolatile, - clang::QualType ty, bool isInit = false, - bool isNontemporal = false); + clang::QualType ty, LValueBaseInfo baseInfo, + bool isInit = false, bool isNontemporal = false); void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit); /// Store the specified rvalue into the specified @@ -1740,7 +1861,7 @@ public: mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s); - mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e); + mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr); /// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is /// nonnull, if 1\p LHS is marked _Nonnull. @@ -1899,25 +2020,23 @@ public: private: template <typename Op> Op emitOpenACCOp(mlir::Location start, OpenACCDirectiveKind dirKind, - SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses); // Function to do the basic implementation of an operation with an Associated // Statement. Models AssociatedStmtConstruct. template <typename Op, typename TermOp> - mlir::LogicalResult emitOpenACCOpAssociatedStmt( - mlir::Location start, mlir::Location end, OpenACCDirectiveKind dirKind, - SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses, - const Stmt *associatedStmt); + mlir::LogicalResult + emitOpenACCOpAssociatedStmt(mlir::Location start, mlir::Location end, + OpenACCDirectiveKind dirKind, + llvm::ArrayRef<const OpenACCClause *> clauses, + const Stmt *associatedStmt); template <typename Op, typename TermOp> mlir::LogicalResult emitOpenACCOpCombinedConstruct( mlir::Location start, mlir::Location end, OpenACCDirectiveKind dirKind, - SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses, - const Stmt *loopStmt); + llvm::ArrayRef<const OpenACCClause *> clauses, const Stmt *loopStmt); template <typename Op> void emitOpenACCClauses(Op &op, OpenACCDirectiveKind dirKind, - SourceLocation dirLoc, ArrayRef<const OpenACCClause *> clauses); // The second template argument doesn't need to be a template, since it should // always be an mlir::acc::LoopOp, but as this is a template anyway, we make @@ -1927,7 +2046,7 @@ private: // instantiated 3x. template <typename ComputeOp, typename LoopOp> void emitOpenACCClauses(ComputeOp &op, LoopOp &loopOp, - OpenACCDirectiveKind dirKind, SourceLocation dirLoc, + OpenACCDirectiveKind dirKind, ArrayRef<const OpenACCClause *> clauses); // The OpenACC LoopOp requires that we have auto, seq, or independent on all diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp index f603f5ec..7e145f2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp @@ -123,6 +123,12 @@ public: return true; } + size_t getSrcArgforCopyCtor(const CXXConstructorDecl *, + FunctionArgList &args) const override { + assert(!args.empty() && "expected the arglist to not be empty!"); + return args.size() - 1; + } + void emitBadCastCall(CIRGenFunction &cgf, mlir::Location loc) override; mlir::Value @@ -459,7 +465,8 @@ void CIRGenItaniumCXXABI::emitVTableDefinitions(CIRGenVTables &cgvt, "emitVTableDefinitions: __fundamental_type_info"); } - auto vtableAsGlobalValue = dyn_cast<cir::CIRGlobalValueInterface>(*vtable); + [[maybe_unused]] auto vtableAsGlobalValue = + dyn_cast<cir::CIRGlobalValueInterface>(*vtable); assert(vtableAsGlobalValue && "VTable must support CIRGlobalValueInterface"); // Always emit type metadata on non-available_externally definitions, and on // available_externally definitions if we are performing whole program diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 9f9b2db..eaa9e94 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -675,12 +675,18 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, errorNYI(d->getSourceRange(), "OpenMP target global variable"); gv.setAlignmentAttr(getSize(astContext.getDeclAlign(d))); - assert(!cir::MissingFeatures::opGlobalConstant()); + // FIXME: This code is overly simple and should be merged with other global + // handling. + gv.setConstant(d->getType().isConstantStorage( + astContext, /*ExcludeCtor=*/false, /*ExcludeDtor=*/false)); setLinkageForGV(gv, d); - if (d->getTLSKind()) - errorNYI(d->getSourceRange(), "thread local global variable"); + if (d->getTLSKind()) { + if (d->getTLSKind() == VarDecl::TLS_Dynamic) + errorNYI(d->getSourceRange(), "TLS dynamic"); + setTLSMode(gv, *d); + } setGVProperties(gv, d); @@ -735,12 +741,11 @@ mlir::Value CIRGenModule::getAddrOfGlobalVar(const VarDecl *d, mlir::Type ty, if (!ty) ty = getTypes().convertTypeForMem(astTy); - assert(!cir::MissingFeatures::opGlobalThreadLocal()); - + bool tlsAccess = d->getTLSKind() != VarDecl::TLS_None; cir::GlobalOp g = getOrCreateCIRGlobal(d, ty, isForDefinition); mlir::Type ptrTy = builder.getPointerTo(g.getSymType()); return cir::GetGlobalOp::create(builder, getLoc(d->getSourceRange()), ptrTy, - g.getSymName()); + g.getSymNameAttr(), tlsAccess); } cir::GlobalViewAttr CIRGenModule::getAddrOfGlobalVarAttr(const VarDecl *d) { @@ -864,7 +869,11 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, if (emitter) emitter->finalize(gv); - assert(!cir::MissingFeatures::opGlobalConstant()); + // If it is safe to mark the global 'constant', do so now. + gv.setConstant((vd->hasAttr<CUDAConstantAttr>() && langOpts.CUDAIsDevice) || + (!needsGlobalCtor && !needsGlobalDtor && + vd->getType().isConstantStorage( + astContext, /*ExcludeCtor=*/true, /*ExcludeDtor=*/true))); assert(!cir::MissingFeatures::opGlobalSection()); // Set CIR's linkage type as appropriate. @@ -876,8 +885,17 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, // FIXME(cir): setLinkage should likely set MLIR's visibility automatically. gv.setVisibility(getMLIRVisibilityFromCIRLinkage(linkage)); assert(!cir::MissingFeatures::opGlobalDLLImportExport()); - if (linkage == cir::GlobalLinkageKind::CommonLinkage) - errorNYI(initExpr->getSourceRange(), "common linkage"); + if (linkage == cir::GlobalLinkageKind::CommonLinkage) { + // common vars aren't constant even if declared const. + gv.setConstant(false); + // Tentative definition of global variables may be initialized with + // non-zero null pointers. In this case they should have weak linkage + // since common linkage must have zero initializer and must not have + // explicit section therefore cannot have non-zero initial value. + std::optional<mlir::Attribute> initializer = gv.getInitialValue(); + if (initializer && !getBuilder().isNullValue(*initializer)) + gv.setLinkage(cir::GlobalLinkageKind::WeakAnyLinkage); + } setNonAliasAttributes(vd, gv); @@ -1231,10 +1249,8 @@ cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator( // linkage. if (!getLangOpts().CPlusPlus && isa<VarDecl>(dd) && !isVarDeclStrongDefinition(astContext, *this, cast<VarDecl>(dd), - getCodeGenOpts().NoCommon)) { - errorNYI(dd->getBeginLoc(), "common linkage", dd->getDeclKindName()); + getCodeGenOpts().NoCommon)) return cir::GlobalLinkageKind::CommonLinkage; - } // selectany symbols are externally visible, so use weak instead of // linkonce. MSVC optimizes away references to const selectany globals, so @@ -1424,6 +1440,23 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s, return builder.getGlobalViewAttr(ptrTy, gv); } +// TODO(cir): this could be a common AST helper for both CIR and LLVM codegen. +LangAS CIRGenModule::getLangTempAllocaAddressSpace() const { + if (getLangOpts().OpenCL) + return LangAS::opencl_private; + + // For temporaries inside functions, CUDA treats them as normal variables. + // LangAS::cuda_device, on the other hand, is reserved for those variables + // explicitly marked with __device__. + if (getLangOpts().CUDAIsDevice) + return LangAS::Default; + + if (getLangOpts().SYCLIsDevice || + (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice)) + errorNYI("SYCL or OpenMP temp address space"); + return LangAS::Default; +} + void CIRGenModule::emitExplicitCastExprType(const ExplicitCastExpr *e, CIRGenFunction *cgf) { if (cgf && e->getType()->isVariablyModifiedType()) @@ -1433,6 +1466,26 @@ void CIRGenModule::emitExplicitCastExprType(const ExplicitCastExpr *e, "emitExplicitCastExprType"); } +mlir::Value CIRGenModule::emitMemberPointerConstant(const UnaryOperator *e) { + assert(!cir::MissingFeatures::cxxABI()); + + mlir::Location loc = getLoc(e->getSourceRange()); + + const auto *decl = cast<DeclRefExpr>(e->getSubExpr())->getDecl(); + + // A member function pointer. + if (isa<CXXMethodDecl>(decl)) { + errorNYI(e->getSourceRange(), "emitMemberPointerConstant: method pointer"); + return {}; + } + + // Otherwise, a member data pointer. + auto ty = mlir::cast<cir::DataMemberType>(convertType(e->getType())); + const auto *fieldDecl = cast<FieldDecl>(decl); + return cir::ConstantOp::create( + builder, loc, builder.getDataMemberAttr(ty, fieldDecl->getFieldIndex())); +} + void CIRGenModule::emitDeclContext(const DeclContext *dc) { for (Decl *decl : dc->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope @@ -1482,10 +1535,10 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { break; } case Decl::OpenACCRoutine: - emitGlobalOpenACCDecl(cast<OpenACCRoutineDecl>(decl)); + emitGlobalOpenACCRoutineDecl(cast<OpenACCRoutineDecl>(decl)); break; case Decl::OpenACCDeclare: - emitGlobalOpenACCDecl(cast<OpenACCDeclareDecl>(decl)); + emitGlobalOpenACCDeclareDecl(cast<OpenACCDeclareDecl>(decl)); break; case Decl::Enum: case Decl::Using: // using X; [C++] @@ -1525,10 +1578,14 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { break; case Decl::ClassTemplateSpecialization: - case Decl::CXXRecord: + case Decl::CXXRecord: { + CXXRecordDecl *crd = cast<CXXRecordDecl>(decl); assert(!cir::MissingFeatures::generateDebugInfo()); - assert(!cir::MissingFeatures::cxxRecordStaticMembers()); + for (auto *childDecl : crd->decls()) + if (isa<VarDecl, CXXRecordDecl, EnumDecl, OpenACCDeclareDecl>(childDecl)) + emitTopLevelDecl(childDecl); break; + } case Decl::FileScopeAsm: // File-scope asm is ignored during device-side CUDA compilation. @@ -1898,6 +1955,33 @@ void CIRGenModule::setGVPropertiesAux(mlir::Operation *op, assert(!cir::MissingFeatures::opGlobalPartition()); } +cir::TLS_Model CIRGenModule::getDefaultCIRTLSModel() const { + switch (getCodeGenOpts().getDefaultTLSModel()) { + case CodeGenOptions::GeneralDynamicTLSModel: + return cir::TLS_Model::GeneralDynamic; + case CodeGenOptions::LocalDynamicTLSModel: + return cir::TLS_Model::LocalDynamic; + case CodeGenOptions::InitialExecTLSModel: + return cir::TLS_Model::InitialExec; + case CodeGenOptions::LocalExecTLSModel: + return cir::TLS_Model::LocalExec; + } + llvm_unreachable("Invalid TLS model!"); +} + +void CIRGenModule::setTLSMode(mlir::Operation *op, const VarDecl &d) { + assert(d.getTLSKind() && "setting TLS mode on non-TLS var!"); + + cir::TLS_Model tlm = getDefaultCIRTLSModel(); + + // Override the TLS model if it is explicitly specified. + if (d.getAttr<TLSModelAttr>()) + errorNYI(d.getSourceRange(), "TLS model attribute"); + + auto global = cast<cir::GlobalOp>(op); + global.setTlsModel(tlm); +} + void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, cir::FuncOp func, bool isIncompleteFunction, @@ -1940,7 +2024,6 @@ void CIRGenModule::setCIRFunctionAttributesForDefinition( existingInlineKind && *existingInlineKind == cir::InlineKind::NoInline; bool isAlwaysInline = existingInlineKind && *existingInlineKind == cir::InlineKind::AlwaysInline; - if (!decl) { assert(!cir::MissingFeatures::hlsl()); @@ -1949,8 +2032,7 @@ void CIRGenModule::setCIRFunctionAttributesForDefinition( // If inlining is disabled and we don't have a declaration to control // inlining, mark the function as 'noinline' unless it is explicitly // marked as 'alwaysinline'. - f.setInlineKindAttr( - cir::InlineAttr::get(&getMLIRContext(), cir::InlineKind::NoInline)); + f.setInlineKind(cir::InlineKind::NoInline); } return; @@ -1967,19 +2049,16 @@ void CIRGenModule::setCIRFunctionAttributesForDefinition( // Handle inline attributes if (decl->hasAttr<NoInlineAttr>() && !isAlwaysInline) { // Add noinline if the function isn't always_inline. - f.setInlineKindAttr( - cir::InlineAttr::get(&getMLIRContext(), cir::InlineKind::NoInline)); + f.setInlineKind(cir::InlineKind::NoInline); } else if (decl->hasAttr<AlwaysInlineAttr>() && !isNoInline) { // Don't override AlwaysInline with NoInline, or vice versa, since we can't // specify both in IR. - f.setInlineKindAttr( - cir::InlineAttr::get(&getMLIRContext(), cir::InlineKind::AlwaysInline)); + f.setInlineKind(cir::InlineKind::AlwaysInline); } else if (codeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) { // If inlining is disabled, force everything that isn't always_inline // to carry an explicit noinline attribute. if (!isAlwaysInline) { - f.setInlineKindAttr( - cir::InlineAttr::get(&getMLIRContext(), cir::InlineKind::NoInline)); + f.setInlineKind(cir::InlineKind::NoInline); } } else { // Otherwise, propagate the inline hint attribute and potentially use its @@ -2001,13 +2080,11 @@ void CIRGenModule::setCIRFunctionAttributesForDefinition( return any_of(pattern->redecls(), checkRedeclForInline); }; if (checkForInline(fd)) { - f.setInlineKindAttr(cir::InlineAttr::get(&getMLIRContext(), - cir::InlineKind::InlineHint)); + f.setInlineKind(cir::InlineKind::InlineHint); } else if (codeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining && !fd->isInlined() && !isAlwaysInline) { - f.setInlineKindAttr( - cir::InlineAttr::get(&getMLIRContext(), cir::InlineKind::NoInline)); + f.setInlineKind(cir::InlineKind::NoInline); } } } @@ -2194,8 +2271,20 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name, assert(!cir::MissingFeatures::opFuncExtraAttrs()); + // Mark C++ special member functions (Constructor, Destructor etc.) + setCXXSpecialMemberAttr(func, funcDecl); + if (!cgf) theModule.push_back(func); + + if (this->getLangOpts().OpenACC) { + // We only have to handle this attribute, since OpenACCAnnotAttrs are + // handled via the end-of-TU work. + for (const auto *attr : + funcDecl->specific_attrs<OpenACCRoutineDeclAttr>()) + emitOpenACCRoutineDecl(funcDecl, func, attr->getLocation(), + attr->Clauses); + } } return func; } @@ -2209,6 +2298,58 @@ CIRGenModule::createCIRBuiltinFunction(mlir::Location loc, StringRef name, return fnOp; } +static cir::CtorKind getCtorKindFromDecl(const CXXConstructorDecl *ctor) { + if (ctor->isDefaultConstructor()) + return cir::CtorKind::Default; + if (ctor->isCopyConstructor()) + return cir::CtorKind::Copy; + if (ctor->isMoveConstructor()) + return cir::CtorKind::Move; + return cir::CtorKind::Custom; +} + +static cir::AssignKind getAssignKindFromDecl(const CXXMethodDecl *method) { + if (method->isCopyAssignmentOperator()) + return cir::AssignKind::Copy; + if (method->isMoveAssignmentOperator()) + return cir::AssignKind::Move; + llvm_unreachable("not a copy or move assignment operator"); +} + +void CIRGenModule::setCXXSpecialMemberAttr( + cir::FuncOp funcOp, const clang::FunctionDecl *funcDecl) { + if (!funcDecl) + return; + + if (const auto *dtor = dyn_cast<CXXDestructorDecl>(funcDecl)) { + auto cxxDtor = cir::CXXDtorAttr::get( + convertType(getASTContext().getCanonicalTagType(dtor->getParent())), + dtor->isTrivial()); + funcOp.setCxxSpecialMemberAttr(cxxDtor); + return; + } + + if (const auto *ctor = dyn_cast<CXXConstructorDecl>(funcDecl)) { + cir::CtorKind kind = getCtorKindFromDecl(ctor); + auto cxxCtor = cir::CXXCtorAttr::get( + convertType(getASTContext().getCanonicalTagType(ctor->getParent())), + kind, ctor->isTrivial()); + funcOp.setCxxSpecialMemberAttr(cxxCtor); + return; + } + + const auto *method = dyn_cast<CXXMethodDecl>(funcDecl); + if (method && (method->isCopyAssignmentOperator() || + method->isMoveAssignmentOperator())) { + cir::AssignKind assignKind = getAssignKindFromDecl(method); + auto cxxAssign = cir::CXXAssignAttr::get( + convertType(getASTContext().getCanonicalTagType(method->getParent())), + assignKind, method->isTrivial()); + funcOp.setCxxSpecialMemberAttr(cxxAssign); + return; + } +} + cir::FuncOp CIRGenModule::createRuntimeFunction(cir::FuncType ty, StringRef name, mlir::ArrayAttr, [[maybe_unused]] bool isLocal, @@ -2406,3 +2547,39 @@ DiagnosticBuilder CIRGenModule::errorNYI(SourceRange loc, llvm::StringRef feature) { return errorNYI(loc.getBegin(), feature) << loc; } + +void CIRGenModule::mapBlockAddress(cir::BlockAddrInfoAttr blockInfo, + cir::LabelOp label) { + [[maybe_unused]] auto result = + blockAddressInfoToLabel.try_emplace(blockInfo, label); + assert(result.second && + "attempting to map a blockaddress info that is already mapped"); +} + +void CIRGenModule::mapUnresolvedBlockAddress(cir::BlockAddressOp op) { + [[maybe_unused]] auto result = unresolvedBlockAddressToLabel.insert(op); + assert(result.second && + "attempting to map a blockaddress operation that is already mapped"); +} + +void CIRGenModule::mapResolvedBlockAddress(cir::BlockAddressOp op, + cir::LabelOp label) { + [[maybe_unused]] auto result = blockAddressToLabel.try_emplace(op, label); + assert(result.second && + "attempting to map a blockaddress operation that is already mapped"); +} + +void CIRGenModule::updateResolvedBlockAddress(cir::BlockAddressOp op, + cir::LabelOp newLabel) { + auto *it = blockAddressToLabel.find(op); + assert(it != blockAddressToLabel.end() && + "trying to update a blockaddress not previously mapped"); + assert(!it->second && "blockaddress already has a resolved label"); + + it->second = newLabel; +} + +cir::LabelOp +CIRGenModule::lookupBlockAddressInfo(cir::BlockAddrInfoAttr blockInfo) { + return blockAddressInfoToLabel.lookup(blockInfo); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 186913d..de263f4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -126,7 +126,23 @@ public: /// the pointers are supposed to be uniqued, should be fine. Revisit this if /// it ends up taking too much memory. llvm::DenseMap<const clang::FieldDecl *, llvm::StringRef> lambdaFieldToName; - + /// Map BlockAddrInfoAttr (function name, label name) to the corresponding CIR + /// LabelOp. This provides the main lookup table used to resolve block + /// addresses into their label operations. + llvm::DenseMap<cir::BlockAddrInfoAttr, cir::LabelOp> blockAddressInfoToLabel; + /// Map CIR BlockAddressOps directly to their resolved LabelOps. + /// Used once a block address has been successfully lowered to a label. + llvm::MapVector<cir::BlockAddressOp, cir::LabelOp> blockAddressToLabel; + /// Track CIR BlockAddressOps that cannot be resolved immediately + /// because their LabelOp has not yet been emitted. These entries + /// are solved later once the corresponding label is available. + llvm::DenseSet<cir::BlockAddressOp> unresolvedBlockAddressToLabel; + cir::LabelOp lookupBlockAddressInfo(cir::BlockAddrInfoAttr blockInfo); + void mapBlockAddress(cir::BlockAddrInfoAttr blockInfo, cir::LabelOp label); + void mapUnresolvedBlockAddress(cir::BlockAddressOp op); + void mapResolvedBlockAddress(cir::BlockAddressOp op, cir::LabelOp); + void updateResolvedBlockAddress(cir::BlockAddressOp op, + cir::LabelOp newLabel); /// Tell the consumer that this variable has been instantiated. void handleCXXStaticMemberVarInstantiation(VarDecl *vd); @@ -297,6 +313,12 @@ public: getAddrOfConstantStringFromLiteral(const StringLiteral *s, llvm::StringRef name = ".str"); + /// Returns the address space for temporary allocations in the language. This + /// ensures that the allocated variable's address space matches the + /// expectations of the AST, rather than using the target's allocation address + /// space, which may lead to type mismatches in other parts of the IR. + LangAS getLangTempAllocaAddressSpace() const; + /// Set attributes which are common to any form of a global definition (alias, /// Objective-C method, function, global variable). /// @@ -425,6 +447,13 @@ public: void setGVProperties(mlir::Operation *op, const NamedDecl *d) const; void setGVPropertiesAux(mlir::Operation *op, const NamedDecl *d) const; + /// Set TLS mode for the given operation based on the given variable + /// declaration. + void setTLSMode(mlir::Operation *op, const VarDecl &d); + + /// Get TLS mode from CodeGenOptions. + cir::TLS_Model getDefaultCIRTLSModel() const; + /// Set function attributes for a function declaration. void setFunctionAttributes(GlobalDecl gd, cir::FuncOp f, bool isIncompleteFunction, bool isThunk); @@ -447,6 +476,20 @@ public: bool performInit); void emitGlobalOpenACCDecl(const clang::OpenACCConstructDecl *cd); + void emitGlobalOpenACCRoutineDecl(const clang::OpenACCRoutineDecl *cd); + void emitGlobalOpenACCDeclareDecl(const clang::OpenACCDeclareDecl *cd); + template <typename BeforeOpTy, typename DataClauseTy> + void emitGlobalOpenACCDeclareDataOperands(const Expr *varOperand, + DataClauseTy dataClause, + OpenACCModifierKind modifiers, + bool structured, bool implicit, + bool requiresDtor); + // Each of the acc.routine operations must have a unique name, so we just use + // an integer counter. This is how Flang does it, so it seems reasonable. + unsigned routineCounter = 0; + void emitOpenACCRoutineDecl(const clang::FunctionDecl *funcDecl, + cir::FuncOp func, SourceLocation pragmaLoc, + ArrayRef<const OpenACCClause *> clauses); // C++ related functions. void emitDeclContext(const DeclContext *dc); @@ -455,6 +498,14 @@ public: /// expression of the given type. mlir::Value emitNullConstant(QualType t, mlir::Location loc); + mlir::TypedAttr emitNullConstantAttr(QualType t); + + /// Return a null constant appropriate for zero-initializing a base class with + /// the given type. This is usually, but not always, an LLVM null constant. + mlir::TypedAttr emitNullConstantForBase(const CXXRecordDecl *record); + + mlir::Value emitMemberPointerConstant(const UnaryOperator *e); + llvm::StringRef getMangledName(clang::GlobalDecl gd); void emitTentativeDefinition(const VarDecl *d); @@ -491,6 +542,10 @@ public: cir::FuncType ty, const clang::FunctionDecl *fd); + /// Mark the function as a special member (e.g. constructor, destructor) + void setCXXSpecialMemberAttr(cir::FuncOp funcOp, + const clang::FunctionDecl *funcDecl); + cir::FuncOp createRuntimeFunction(cir::FuncType ty, llvm::StringRef name, mlir::ArrayAttr = {}, bool isLocal = false, bool assumeConvergent = false); diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 527dfd2..8e7384ae 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -14,6 +14,7 @@ #include "CIRGenCXXABI.h" #include "CIRGenFunction.h" +#include "CIRGenOpenACCHelpers.h" #include "CIRGenOpenACCRecipe.h" #include "clang/AST/ExprCXX.h" @@ -60,9 +61,6 @@ class OpenACCClauseCIREmitter final // This is necessary since a few of the clauses emit differently based on the // directive kind they are attached to. OpenACCDirectiveKind dirKind; - // TODO(cir): This source location should be able to go away once the NYI - // diagnostics are gone. - SourceLocation dirLoc; llvm::SmallVector<mlir::acc::DeviceType> lastDeviceTypeValues; // Keep track of the async-clause so that we can shortcut updating the data @@ -71,10 +69,6 @@ class OpenACCClauseCIREmitter final // Keep track of the data operands so that we can update their async clauses. llvm::SmallVector<mlir::Operation *> dataOperands; - void clauseNotImplemented(const OpenACCClause &c) { - cgf.cgm.errorNYI(c.getSourceRange(), "OpenACC Clause", c.getClauseKind()); - } - void setLastDeviceTypeClause(const OpenACCDeviceTypeClause &clause) { lastDeviceTypeValues.clear(); @@ -118,19 +112,6 @@ class OpenACCClauseCIREmitter final return createConstantInt(cgf.cgm.getLoc(loc), width, value); } - mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) { - // '*' case leaves no identifier-info, just a nullptr. - if (!ii) - return mlir::acc::DeviceType::Star; - return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName()) - .CaseLower("default", mlir::acc::DeviceType::Default) - .CaseLower("host", mlir::acc::DeviceType::Host) - .CaseLower("multicore", mlir::acc::DeviceType::Multicore) - .CasesLower({"nvidia", "acc_device_nvidia"}, - mlir::acc::DeviceType::Nvidia) - .CaseLower("radeon", mlir::acc::DeviceType::Radeon); - } - mlir::acc::GangArgType decodeGangType(OpenACCGangKind gk) { switch (gk) { case OpenACCGangKind::Num: @@ -149,7 +130,7 @@ class OpenACCClauseCIREmitter final mlir::OpBuilder::InsertionGuard guardCase(builder); builder.setInsertionPoint(operation.loopOp); OpenACCClauseCIREmitter<mlir::acc::LoopOp> loopEmitter{ - operation.loopOp, recipeInsertLocation, cgf, builder, dirKind, dirLoc}; + operation.loopOp, recipeInsertLocation, cgf, builder, dirKind}; loopEmitter.lastDeviceTypeValues = lastDeviceTypeValues; loopEmitter.Visit(&c); } @@ -160,12 +141,7 @@ class OpenACCClauseCIREmitter final mlir::OpBuilder::InsertionGuard guardCase(builder); builder.setInsertionPoint(operation.computeOp); OpenACCClauseCIREmitter<typename OpTy::ComputeOpTy> computeEmitter{ - operation.computeOp, - recipeInsertLocation, - cgf, - builder, - dirKind, - dirLoc}; + operation.computeOp, recipeInsertLocation, cgf, builder, dirKind}; computeEmitter.lastDeviceTypeValues = lastDeviceTypeValues; @@ -182,33 +158,6 @@ class OpenACCClauseCIREmitter final dataOperands.append(computeEmitter.dataOperands); } - mlir::acc::DataClauseModifier - convertModifiers(OpenACCModifierKind modifiers) { - using namespace mlir::acc; - static_assert(static_cast<int>(OpenACCModifierKind::Zero) == - static_cast<int>(DataClauseModifier::zero) && - static_cast<int>(OpenACCModifierKind::Readonly) == - static_cast<int>(DataClauseModifier::readonly) && - static_cast<int>(OpenACCModifierKind::AlwaysIn) == - static_cast<int>(DataClauseModifier::alwaysin) && - static_cast<int>(OpenACCModifierKind::AlwaysOut) == - static_cast<int>(DataClauseModifier::alwaysout) && - static_cast<int>(OpenACCModifierKind::Capture) == - static_cast<int>(DataClauseModifier::capture)); - - DataClauseModifier mlirModifiers{}; - - // The MLIR representation of this represents `always` as `alwaysin` + - // `alwaysout`. So do a small fixup here. - if (isOpenACCModifierBitSet(modifiers, OpenACCModifierKind::Always)) { - mlirModifiers = mlirModifiers | DataClauseModifier::always; - modifiers &= ~OpenACCModifierKind::Always; - } - - mlirModifiers = mlirModifiers | static_cast<DataClauseModifier>(modifiers); - return mlirModifiers; - } - template <typename BeforeOpTy, typename AfterOpTy> void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause, OpenACCModifierKind modifiers, bool structured, @@ -243,8 +192,8 @@ class OpenACCClauseCIREmitter final // Set the 'rest' of the info for both operations. beforeOp.setDataClause(dataClause); afterOp.setDataClause(dataClause); - beforeOp.setModifiers(convertModifiers(modifiers)); - afterOp.setModifiers(convertModifiers(modifiers)); + beforeOp.setModifiers(convertOpenACCModifiers(modifiers)); + afterOp.setModifiers(convertOpenACCModifiers(modifiers)); // Make sure we record these, so 'async' values can be updated later. dataOperands.push_back(beforeOp.getOperation()); @@ -264,7 +213,7 @@ class OpenACCClauseCIREmitter final // Set the 'rest' of the info for the operation. beforeOp.setDataClause(dataClause); - beforeOp.setModifiers(convertModifiers(modifiers)); + beforeOp.setModifiers(convertOpenACCModifiers(modifiers)); // Make sure we record these, so 'async' values can be updated later. dataOperands.push_back(beforeOp.getOperation()); @@ -368,12 +317,12 @@ public: mlir::OpBuilder::InsertPoint &recipeInsertLocation, CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder, - OpenACCDirectiveKind dirKind, SourceLocation dirLoc) + OpenACCDirectiveKind dirKind) : operation(operation), recipeInsertLocation(recipeInsertLocation), - cgf(cgf), builder(builder), dirKind(dirKind), dirLoc(dirLoc) {} + cgf(cgf), builder(builder), dirKind(dirKind) {} void VisitClause(const OpenACCClause &clause) { - clauseNotImplemented(clause); + llvm_unreachable("Unknown/unhandled clause kind"); } // The entry point for the CIR emitter. All users should use this rather than @@ -432,9 +381,7 @@ public: // Nothing to do here either, combined constructs are just going to use // 'lastDeviceTypeValues' to set the value for the child visitor. } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. routine construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitDeviceTypeClause"); } } @@ -498,9 +445,7 @@ public: } else if constexpr (isCombinedType<OpTy>) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Combined constructs remain. update construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitAsyncClause"); } } @@ -627,7 +572,7 @@ public: } else { // TODO: When we've implemented this for everything, switch this to an // unreachable. update construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitWaitClause"); } } @@ -646,9 +591,7 @@ public: } else if constexpr (isCombinedType<OpTy>) { applyToLoopOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Routine construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitSeqClause"); } } @@ -658,9 +601,7 @@ public: } else if constexpr (isCombinedType<OpTy>) { applyToLoopOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Routine, construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitAutoClause"); } } @@ -670,9 +611,7 @@ public: } else if constexpr (isCombinedType<OpTy>) { applyToLoopOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Routine construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitIndependentClause"); } } @@ -732,9 +671,7 @@ public: } else if constexpr (isCombinedType<OpTy>) { applyToLoopOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Combined constructs remain. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitWorkerClause"); } } @@ -750,9 +687,7 @@ public: } else if constexpr (isCombinedType<OpTy>) { applyToLoopOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Combined constructs remain. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitVectorClause"); } } @@ -800,12 +735,16 @@ public: var, mlir::acc::DataClause::acc_copy, clause.getModifierList(), /*structured=*/true, /*implicit=*/false); + } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) { + for (const Expr *var : clause.getVarList()) + addDataOperand<mlir::acc::CopyinOp>( + var, mlir::acc::DataClause::acc_copy, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType<OpTy>) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCopyClause"); } } @@ -822,12 +761,16 @@ public: addDataOperand<mlir::acc::CopyinOp>( var, mlir::acc::DataClause::acc_copyin, clause.getModifierList(), /*structured=*/false, /*implicit=*/false); + } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) { + for (const Expr *var : clause.getVarList()) + addDataOperand<mlir::acc::CopyinOp>( + var, mlir::acc::DataClause::acc_copyin, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType<OpTy>) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCopyInClause"); } } @@ -845,12 +788,16 @@ public: var, mlir::acc::DataClause::acc_copyout, clause.getModifierList(), /*structured=*/false, /*implicit=*/false); + } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) { + for (const Expr *var : clause.getVarList()) + addDataOperand<mlir::acc::CreateOp>( + var, mlir::acc::DataClause::acc_copyout, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType<OpTy>) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCopyOutClause"); } } @@ -867,12 +814,28 @@ public: addDataOperand<mlir::acc::CreateOp>( var, mlir::acc::DataClause::acc_create, clause.getModifierList(), /*structured=*/false, /*implicit=*/false); + } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) { + for (const Expr *var : clause.getVarList()) + addDataOperand<mlir::acc::CreateOp>( + var, mlir::acc::DataClause::acc_create, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType<OpTy>) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCreateClause"); + } + } + + void VisitLinkClause(const OpenACCLinkClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) { + for (const Expr *var : clause.getVarList()) + addDataOperand<mlir::acc::DeclareLinkOp>( + var, mlir::acc::DataClause::acc_declare_link, {}, + /*structured=*/true, + /*implicit=*/false); + } else { + llvm_unreachable("Unknown construct kind in VisitLinkClause"); } } @@ -921,7 +884,8 @@ public: void VisitDevicePtrClause(const OpenACCDevicePtrClause &clause) { if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp, - mlir::acc::KernelsOp, mlir::acc::DataOp>) { + mlir::acc::KernelsOp, mlir::acc::DataOp, + mlir::acc::DeclareEnterOp>) { for (const Expr *var : clause.getVarList()) addDataOperand<mlir::acc::DevicePtrOp>( var, mlir::acc::DataClause::acc_deviceptr, {}, @@ -930,9 +894,7 @@ public: } else if constexpr (isCombinedType<OpTy>) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitDevicePtrClause"); } } @@ -957,12 +919,16 @@ public: addDataOperand<mlir::acc::PresentOp, mlir::acc::DeleteOp>( var, mlir::acc::DataClause::acc_present, {}, /*structured=*/true, /*implicit=*/false); + } else if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) { + for (const Expr *var : clause.getVarList()) + addDataOperand<mlir::acc::PresentOp>( + var, mlir::acc::DataClause::acc_present, {}, + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType<OpTy>) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitPresentClause"); } } @@ -1104,6 +1070,18 @@ public: llvm_unreachable("Unknown construct kind in VisitReductionClause"); } } + + void VisitDeviceResidentClause(const OpenACCDeviceResidentClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::DeclareEnterOp>) { + for (const Expr *var : clause.getVarList()) + addDataOperand<mlir::acc::DeclareDeviceResidentOp>( + var, mlir::acc::DataClause::acc_declare_device_resident, {}, + /*structured=*/true, + /*implicit=*/false); + } else { + llvm_unreachable("Unknown construct kind in VisitDeviceResidentClause"); + } + } }; template <typename OpTy> @@ -1111,29 +1089,28 @@ auto makeClauseEmitter(OpTy &op, mlir::OpBuilder::InsertPoint &recipeInsertLocation, CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder, - OpenACCDirectiveKind dirKind, SourceLocation dirLoc) { + OpenACCDirectiveKind dirKind) { return OpenACCClauseCIREmitter<OpTy>(op, recipeInsertLocation, cgf, builder, - dirKind, dirLoc); + dirKind); } } // namespace template <typename Op> void CIRGenFunction::emitOpenACCClauses( - Op &op, OpenACCDirectiveKind dirKind, SourceLocation dirLoc, + Op &op, OpenACCDirectiveKind dirKind, ArrayRef<const OpenACCClause *> clauses) { mlir::OpBuilder::InsertionGuard guardCase(builder); // Sets insertion point before the 'op', since every new expression needs to // be before the operation. builder.setInsertionPoint(op); - makeClauseEmitter(op, lastRecipeLocation, *this, builder, dirKind, dirLoc) + makeClauseEmitter(op, lastRecipeLocation, *this, builder, dirKind) .emitClauses(clauses); } #define EXPL_SPEC(N) \ template void CIRGenFunction::emitOpenACCClauses<N>( \ - N &, OpenACCDirectiveKind, SourceLocation, \ - ArrayRef<const OpenACCClause *>); + N &, OpenACCDirectiveKind, ArrayRef<const OpenACCClause *>); EXPL_SPEC(mlir::acc::ParallelOp) EXPL_SPEC(mlir::acc::SerialOp) EXPL_SPEC(mlir::acc::KernelsOp) @@ -1151,25 +1128,26 @@ EXPL_SPEC(mlir::acc::AtomicReadOp) EXPL_SPEC(mlir::acc::AtomicWriteOp) EXPL_SPEC(mlir::acc::AtomicCaptureOp) EXPL_SPEC(mlir::acc::AtomicUpdateOp) +EXPL_SPEC(mlir::acc::DeclareEnterOp) #undef EXPL_SPEC template <typename ComputeOp, typename LoopOp> void CIRGenFunction::emitOpenACCClauses( ComputeOp &op, LoopOp &loopOp, OpenACCDirectiveKind dirKind, - SourceLocation dirLoc, ArrayRef<const OpenACCClause *> clauses) { + ArrayRef<const OpenACCClause *> clauses) { static_assert(std::is_same_v<mlir::acc::LoopOp, LoopOp>); CombinedConstructClauseInfo<ComputeOp> inf{op, loopOp}; // We cannot set the insertion point here and do so in the emitter, but make // sure we reset it with the 'guard' anyway. mlir::OpBuilder::InsertionGuard guardCase(builder); - makeClauseEmitter(inf, lastRecipeLocation, *this, builder, dirKind, dirLoc) + makeClauseEmitter(inf, lastRecipeLocation, *this, builder, dirKind) .emitClauses(clauses); } #define EXPL_SPEC(N) \ template void CIRGenFunction::emitOpenACCClauses<N, mlir::acc::LoopOp>( \ - N &, mlir::acc::LoopOp &, OpenACCDirectiveKind, SourceLocation, \ + N &, mlir::acc::LoopOp &, OpenACCDirectiveKind, \ ArrayRef<const OpenACCClause *>); EXPL_SPEC(mlir::acc::ParallelOp) diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCHelpers.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCHelpers.h new file mode 100644 index 0000000..639d148 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCHelpers.h @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains helpers for OpenACC emission that don't need to be in +// CIRGenModule, but can't live in a single .cpp file. +// +//===----------------------------------------------------------------------===// +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "clang/AST/DeclOpenACC.h" + +namespace clang::CIRGen { +inline mlir::acc::DataClauseModifier +convertOpenACCModifiers(OpenACCModifierKind modifiers) { + using namespace mlir::acc; + static_assert(static_cast<int>(OpenACCModifierKind::Zero) == + static_cast<int>(DataClauseModifier::zero) && + static_cast<int>(OpenACCModifierKind::Readonly) == + static_cast<int>(DataClauseModifier::readonly) && + static_cast<int>(OpenACCModifierKind::AlwaysIn) == + static_cast<int>(DataClauseModifier::alwaysin) && + static_cast<int>(OpenACCModifierKind::AlwaysOut) == + static_cast<int>(DataClauseModifier::alwaysout) && + static_cast<int>(OpenACCModifierKind::Capture) == + static_cast<int>(DataClauseModifier::capture)); + + DataClauseModifier mlirModifiers{}; + + // The MLIR representation of this represents `always` as `alwaysin` + + // `alwaysout`. So do a small fixup here. + if (isOpenACCModifierBitSet(modifiers, OpenACCModifierKind::Always)) { + mlirModifiers = mlirModifiers | DataClauseModifier::always; + modifiers &= ~OpenACCModifierKind::Always; + } + + mlirModifiers = mlirModifiers | static_cast<DataClauseModifier>(modifiers); + return mlirModifiers; +} + +inline mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) { + // '*' case leaves no identifier-info, just a nullptr. + if (!ii) + return mlir::acc::DeviceType::Star; + return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName()) + .CaseLower("default", mlir::acc::DeviceType::Default) + .CaseLower("host", mlir::acc::DeviceType::Host) + .CaseLower("multicore", mlir::acc::DeviceType::Multicore) + .CasesLower({"nvidia", "acc_device_nvidia"}, + mlir::acc::DeviceType::Nvidia) + .CaseLower("radeon", mlir::acc::DeviceType::Radeon); +} +} // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenPointerAuth.cpp b/clang/lib/CIR/CodeGen/CIRGenPointerAuth.cpp new file mode 100644 index 0000000..20b0646 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenPointerAuth.cpp @@ -0,0 +1,23 @@ +//===--- CIRGenPointerAuth.cpp - CIR generation for ptr auth --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains common routines relating to the emission of +// pointer authentication operations. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" + +using namespace clang; +using namespace clang::CIRGen; + +Address CIRGenFunction::getAsNaturalAddressOf(Address addr, + QualType pointeeTy) { + assert(!cir::MissingFeatures::pointerAuthentication()); + return addr; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h index bf0ddc5..c936497 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h @@ -184,6 +184,11 @@ public: return fieldIdxMap.lookup(fd); } + unsigned getNonVirtualBaseCIRFieldNo(const CXXRecordDecl *rd) const { + assert(nonVirtualBases.count(rd) && "Invalid non-virtual base!"); + return nonVirtualBases.lookup(rd); + } + /// Check whether this struct can be C++ zero-initialized /// with a zeroinitializer. bool isZeroInitializable() const { return zeroInitializable; } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index 1eb7199..f13e7cb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -66,7 +66,7 @@ static mlir::LogicalResult emitStmtWithResult(CIRGenFunction &cgf, mlir::LogicalResult CIRGenFunction::emitCompoundStmtWithoutScope( const CompoundStmt &s, Address *lastValue, AggValueSlot slot) { mlir::LogicalResult result = mlir::success(); - const Stmt *exprResult = s.getStmtExprResult(); + const Stmt *exprResult = s.body_back(); assert((!lastValue || (lastValue && exprResult)) && "If lastValue is not null then the CompoundStmt must have a " "StmtExprResult"); @@ -159,6 +159,10 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, return emitCXXTryStmt(cast<CXXTryStmt>(*s)); case Stmt::CXXForRangeStmtClass: return emitCXXForRangeStmt(cast<CXXForRangeStmt>(*s), attr); + case Stmt::CoroutineBodyStmtClass: + return emitCoroutineBody(cast<CoroutineBodyStmt>(*s)); + case Stmt::IndirectGotoStmtClass: + return emitIndirectGotoStmt(cast<IndirectGotoStmt>(*s)); case Stmt::OpenACCComputeConstructClass: return emitOpenACCComputeConstruct(cast<OpenACCComputeConstruct>(*s)); case Stmt::OpenACCLoopConstructClass: @@ -199,10 +203,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::CaseStmtClass: case Stmt::SEHLeaveStmtClass: case Stmt::SYCLKernelCallStmtClass: - case Stmt::CoroutineBodyStmtClass: - return emitCoroutineBody(cast<CoroutineBodyStmt>(*s)); case Stmt::CoreturnStmtClass: - case Stmt::IndirectGotoStmtClass: case Stmt::OMPParallelDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: case Stmt::OMPTaskyieldDirectiveClass: @@ -458,7 +459,14 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { if (getContext().getLangOpts().ElideConstructors && s.getNRVOCandidate() && s.getNRVOCandidate()->isNRVOVariable()) { assert(!cir::MissingFeatures::openMP()); - assert(!cir::MissingFeatures::nrvo()); + // Apply the named return value optimization for this return statement, + // which means doing nothing: the appropriate result has already been + // constructed into the NRVO variable. + + // If there is an NRVO flag for this variable, set it to 1 into indicate + // that the cleanup code should not destroy the variable. + if (auto nrvoFlag = nrvoFlags[s.getNRVOCandidate()]) + builder.createFlagStore(loc, true, nrvoFlag); } else if (!rv) { // No return expression. Do nothing. } else if (rv->getType()->isVoidType()) { @@ -556,6 +564,17 @@ mlir::LogicalResult CIRGenFunction::emitGotoStmt(const clang::GotoStmt &s) { } mlir::LogicalResult +CIRGenFunction::emitIndirectGotoStmt(const IndirectGotoStmt &s) { + mlir::Value val = emitScalarExpr(s.getTarget()); + assert(indirectGotoBlock && + "If you jumping to a indirect branch should be alareadye emitted"); + cir::BrOp::create(builder, getLoc(s.getSourceRange()), indirectGotoBlock, + val); + builder.createBlock(builder.getBlock()->getParent()); + return mlir::success(); +} + +mlir::LogicalResult CIRGenFunction::emitContinueStmt(const clang::ContinueStmt &s) { builder.createContinue(getLoc(s.getKwLoc())); @@ -581,9 +600,14 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) { } builder.setInsertionPointToEnd(labelBlock); - cir::LabelOp::create(builder, getLoc(d.getSourceRange()), d.getName()); + cir::LabelOp label = + cir::LabelOp::create(builder, getLoc(d.getSourceRange()), d.getName()); builder.setInsertionPointToEnd(labelBlock); - + auto func = cast<cir::FuncOp>(curFn); + cgm.mapBlockAddress(cir::BlockAddrInfoAttr::get(builder.getContext(), + func.getSymNameAttr(), + label.getLabelAttr()), + label); // FIXME: emit debug info for labels, incrementProfileCounter assert(!cir::MissingFeatures::ehstackBranches()); assert(!cir::MissingFeatures::incrementProfileCounter()); diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp index 9e55bd5..11aad17 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp @@ -24,15 +24,14 @@ using namespace mlir::acc; template <typename Op, typename TermOp> mlir::LogicalResult CIRGenFunction::emitOpenACCOpAssociatedStmt( mlir::Location start, mlir::Location end, OpenACCDirectiveKind dirKind, - SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses, - const Stmt *associatedStmt) { + llvm::ArrayRef<const OpenACCClause *> clauses, const Stmt *associatedStmt) { mlir::LogicalResult res = mlir::success(); llvm::SmallVector<mlir::Type> retTy; llvm::SmallVector<mlir::Value> operands; auto op = Op::create(builder, start, retTy, operands); - emitOpenACCClauses(op, dirKind, dirLoc, clauses); + emitOpenACCClauses(op, dirKind, clauses); { mlir::Block &block = op.getRegion().emplaceBlock(); @@ -66,8 +65,7 @@ template <> struct CombinedType<KernelsOp> { template <typename Op, typename TermOp> mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct( mlir::Location start, mlir::Location end, OpenACCDirectiveKind dirKind, - SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses, - const Stmt *loopStmt) { + llvm::ArrayRef<const OpenACCClause *> clauses, const Stmt *loopStmt) { mlir::LogicalResult res = mlir::success(); llvm::SmallVector<mlir::Type> retTy; @@ -102,7 +100,7 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct( mlir::acc::YieldOp::create(builder, end); } - emitOpenACCClauses(computeOp, loopOp, dirKind, dirLoc, clauses); + emitOpenACCClauses(computeOp, loopOp, dirKind, clauses); updateLoopOpParallelism(loopOp, /*isOrphan=*/false, dirKind); @@ -114,13 +112,13 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct( template <typename Op> Op CIRGenFunction::emitOpenACCOp( - mlir::Location start, OpenACCDirectiveKind dirKind, SourceLocation dirLoc, + mlir::Location start, OpenACCDirectiveKind dirKind, llvm::ArrayRef<const OpenACCClause *> clauses) { llvm::SmallVector<mlir::Type> retTy; llvm::SmallVector<mlir::Value> operands; auto op = Op::create(builder, start, retTy, operands); - emitOpenACCClauses(op, dirKind, dirLoc, clauses); + emitOpenACCClauses(op, dirKind, clauses); return op; } @@ -132,16 +130,13 @@ CIRGenFunction::emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s) { switch (s.getDirectiveKind()) { case OpenACCDirectiveKind::Parallel: return emitOpenACCOpAssociatedStmt<ParallelOp, mlir::acc::YieldOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getStructuredBlock()); + start, end, s.getDirectiveKind(), s.clauses(), s.getStructuredBlock()); case OpenACCDirectiveKind::Serial: return emitOpenACCOpAssociatedStmt<SerialOp, mlir::acc::YieldOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getStructuredBlock()); + start, end, s.getDirectiveKind(), s.clauses(), s.getStructuredBlock()); case OpenACCDirectiveKind::Kernels: return emitOpenACCOpAssociatedStmt<KernelsOp, mlir::acc::TerminatorOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getStructuredBlock()); + start, end, s.getDirectiveKind(), s.clauses(), s.getStructuredBlock()); default: llvm_unreachable("invalid compute construct kind"); } @@ -153,39 +148,34 @@ CIRGenFunction::emitOpenACCDataConstruct(const OpenACCDataConstruct &s) { mlir::Location end = getLoc(s.getSourceRange().getEnd()); return emitOpenACCOpAssociatedStmt<DataOp, mlir::acc::TerminatorOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getStructuredBlock()); + start, end, s.getDirectiveKind(), s.clauses(), s.getStructuredBlock()); } mlir::LogicalResult CIRGenFunction::emitOpenACCInitConstruct(const OpenACCInitConstruct &s) { mlir::Location start = getLoc(s.getSourceRange().getBegin()); - emitOpenACCOp<InitOp>(start, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + emitOpenACCOp<InitOp>(start, s.getDirectiveKind(), s.clauses()); return mlir::success(); } mlir::LogicalResult CIRGenFunction::emitOpenACCSetConstruct(const OpenACCSetConstruct &s) { mlir::Location start = getLoc(s.getSourceRange().getBegin()); - emitOpenACCOp<SetOp>(start, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + emitOpenACCOp<SetOp>(start, s.getDirectiveKind(), s.clauses()); return mlir::success(); } mlir::LogicalResult CIRGenFunction::emitOpenACCShutdownConstruct( const OpenACCShutdownConstruct &s) { mlir::Location start = getLoc(s.getSourceRange().getBegin()); - emitOpenACCOp<ShutdownOp>(start, s.getDirectiveKind(), - s.getDirectiveLoc(), s.clauses()); + emitOpenACCOp<ShutdownOp>(start, s.getDirectiveKind(), s.clauses()); return mlir::success(); } mlir::LogicalResult CIRGenFunction::emitOpenACCWaitConstruct(const OpenACCWaitConstruct &s) { mlir::Location start = getLoc(s.getSourceRange().getBegin()); - auto waitOp = emitOpenACCOp<WaitOp>(start, s.getDirectiveKind(), - s.getDirectiveLoc(), s.clauses()); + auto waitOp = emitOpenACCOp<WaitOp>(start, s.getDirectiveKind(), s.clauses()); auto createIntExpr = [this](const Expr *intExpr) { mlir::Value expr = emitScalarExpr(intExpr); @@ -225,16 +215,13 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCCombinedConstruct( switch (s.getDirectiveKind()) { case OpenACCDirectiveKind::ParallelLoop: return emitOpenACCOpCombinedConstruct<ParallelOp, mlir::acc::YieldOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getLoop()); + start, end, s.getDirectiveKind(), s.clauses(), s.getLoop()); case OpenACCDirectiveKind::SerialLoop: return emitOpenACCOpCombinedConstruct<SerialOp, mlir::acc::YieldOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getLoop()); + start, end, s.getDirectiveKind(), s.clauses(), s.getLoop()); case OpenACCDirectiveKind::KernelsLoop: return emitOpenACCOpCombinedConstruct<KernelsOp, mlir::acc::TerminatorOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getLoop()); + start, end, s.getDirectiveKind(), s.clauses(), s.getLoop()); default: llvm_unreachable("invalid compute construct kind"); } @@ -246,31 +233,27 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCHostDataConstruct( mlir::Location end = getLoc(s.getSourceRange().getEnd()); return emitOpenACCOpAssociatedStmt<HostDataOp, mlir::acc::TerminatorOp>( - start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(), - s.getStructuredBlock()); + start, end, s.getDirectiveKind(), s.clauses(), s.getStructuredBlock()); } mlir::LogicalResult CIRGenFunction::emitOpenACCEnterDataConstruct( const OpenACCEnterDataConstruct &s) { mlir::Location start = getLoc(s.getSourceRange().getBegin()); - emitOpenACCOp<EnterDataOp>(start, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + emitOpenACCOp<EnterDataOp>(start, s.getDirectiveKind(), s.clauses()); return mlir::success(); } mlir::LogicalResult CIRGenFunction::emitOpenACCExitDataConstruct( const OpenACCExitDataConstruct &s) { mlir::Location start = getLoc(s.getSourceRange().getBegin()); - emitOpenACCOp<ExitDataOp>(start, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + emitOpenACCOp<ExitDataOp>(start, s.getDirectiveKind(), s.clauses()); return mlir::success(); } mlir::LogicalResult CIRGenFunction::emitOpenACCUpdateConstruct(const OpenACCUpdateConstruct &s) { mlir::Location start = getLoc(s.getSourceRange().getBegin()); - emitOpenACCOp<UpdateOp>(start, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + emitOpenACCOp<UpdateOp>(start, s.getDirectiveKind(), s.clauses()); return mlir::success(); } @@ -314,15 +297,80 @@ const VarDecl *getLValueDecl(const Expr *e) { return cast<VarDecl>(dre->getDecl()); } -mlir::LogicalResult -CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { - // For now, we are only support 'read'/'write'/'update', so diagnose. We can - // switch on the kind later once we implement the 'capture' form. - if (s.getAtomicKind() == OpenACCAtomicKind::Capture) { - cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct"); - return mlir::failure(); +static mlir::acc::AtomicReadOp +emitAtomicRead(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + mlir::Location start, + const OpenACCAtomicConstruct::SingleStmtInfo &inf) { + // Atomic 'read' only permits 'v = x', where v and x are both scalar L + // values. The getAssociatedStmtInfo strips off implicit casts, which + // includes implicit conversions and L-to-R-Value conversions, so we can + // just emit it as an L value. The Flang implementation has no problem with + // different types, so it appears that the dialect can handle the + // conversions. + mlir::Value v = cgf.emitLValue(inf.V).getPointer(); + mlir::Value x = cgf.emitLValue(inf.X).getPointer(); + mlir::Type resTy = cgf.convertType(inf.V->getType()); + return mlir::acc::AtomicReadOp::create(builder, start, x, v, resTy, + /*ifCond=*/{}); +} + +static mlir::acc::AtomicWriteOp +emitAtomicWrite(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + mlir::Location start, + const OpenACCAtomicConstruct::SingleStmtInfo &inf) { + mlir::Value x = cgf.emitLValue(inf.X).getPointer(); + mlir::Value expr = cgf.emitAnyExpr(inf.RefExpr).getValue(); + return mlir::acc::AtomicWriteOp::create(builder, start, x, expr, + /*ifCond=*/{}); +} + +static std::pair<mlir::LogicalResult, mlir::acc::AtomicUpdateOp> +emitAtomicUpdate(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + mlir::Location start, mlir::Location end, + const OpenACCAtomicConstruct::SingleStmtInfo &inf) { + mlir::Value x = cgf.emitLValue(inf.X).getPointer(); + auto op = mlir::acc::AtomicUpdateOp::create(builder, start, x, /*ifCond=*/{}); + + mlir::LogicalResult res = mlir::success(); + { + mlir::OpBuilder::InsertionGuard guardCase(builder); + mlir::Type argTy = cast<cir::PointerType>(x.getType()).getPointee(); + std::array<mlir::Type, 1> recipeType{argTy}; + std::array<mlir::Location, 1> recipeLoc{start}; + auto *recipeBlock = builder.createBlock( + &op.getRegion(), op.getRegion().end(), recipeType, recipeLoc); + builder.setInsertionPointToEnd(recipeBlock); + // Since we have an initial value that we know is a scalar type, we can + // just emit the entire statement here after sneaking-in our 'alloca' in + // the right place, then loading out of it. Flang does a lot less work + // (probably does its own emitting!), but we have more complicated AST + // nodes to worry about, so we can just count on opt to remove the extra + // alloca/load/store set. + auto alloca = cir::AllocaOp::create( + builder, start, x.getType(), argTy, "x_var", + cgf.cgm.getSize( + cgf.getContext().getTypeAlignInChars(inf.X->getType()))); + + alloca.setInitAttr(builder.getUnitAttr()); + builder.CIRBaseBuilderTy::createStore(start, recipeBlock->getArgument(0), + alloca); + + const VarDecl *xval = getLValueDecl(inf.X); + CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, xval}; + cgf.replaceAddrOfLocalVar( + xval, Address{alloca, argTy, cgf.getContext().getDeclAlign(xval)}); + + res = cgf.emitStmt(inf.WholeExpr, /*useCurrentScope=*/true); + + auto load = cir::LoadOp::create(builder, start, {alloca}); + mlir::acc::YieldOp::create(builder, end, {load}); } + return {res, op}; +} + +mlir::LogicalResult +CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { // While Atomic is an 'associated statement' construct, it 'steals' the // expression it is associated with rather than emitting it inside of it. So // it has custom emit logic. @@ -331,78 +379,85 @@ CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) { OpenACCAtomicConstruct::StmtInfo inf = s.getAssociatedStmtInfo(); switch (s.getAtomicKind()) { - case OpenACCAtomicKind::Capture: - llvm_unreachable("Unimplemented atomic construct type, should have " - "diagnosed/returned above"); - return mlir::failure(); case OpenACCAtomicKind::Read: { - - // Atomic 'read' only permits 'v = x', where v and x are both scalar L - // values. The getAssociatedStmtInfo strips off implicit casts, which - // includes implicit conversions and L-to-R-Value conversions, so we can - // just emit it as an L value. The Flang implementation has no problem with - // different types, so it appears that the dialect can handle the - // conversions. - mlir::Value v = emitLValue(inf.V).getPointer(); - mlir::Value x = emitLValue(inf.X).getPointer(); - mlir::Type resTy = convertType(inf.V->getType()); - auto op = mlir::acc::AtomicReadOp::create(builder, start, x, v, resTy, - /*ifCond=*/{}); - emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + assert(inf.Form == OpenACCAtomicConstruct::StmtInfo::StmtForm::Read); + mlir::acc::AtomicReadOp op = + emitAtomicRead(*this, builder, start, inf.First); + emitOpenACCClauses(op, s.getDirectiveKind(), s.clauses()); return mlir::success(); } case OpenACCAtomicKind::Write: { - mlir::Value x = emitLValue(inf.X).getPointer(); - mlir::Value expr = emitAnyExpr(inf.RefExpr).getValue(); - auto op = mlir::acc::AtomicWriteOp::create(builder, start, x, expr, - /*ifCond=*/{}); - emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + assert(inf.Form == OpenACCAtomicConstruct::StmtInfo::StmtForm::Write); + auto op = emitAtomicWrite(*this, builder, start, inf.First); + emitOpenACCClauses(op, s.getDirectiveKind(), s.clauses()); return mlir::success(); } case OpenACCAtomicKind::None: case OpenACCAtomicKind::Update: { - mlir::Value x = emitLValue(inf.X).getPointer(); - auto op = - mlir::acc::AtomicUpdateOp::create(builder, start, x, /*ifCond=*/{}); - emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + assert(inf.Form == OpenACCAtomicConstruct::StmtInfo::StmtForm::Update); + auto [res, op] = emitAtomicUpdate(*this, builder, start, end, inf.First); + emitOpenACCClauses(op, s.getDirectiveKind(), s.clauses()); + return res; + } + case OpenACCAtomicKind::Capture: { + // Atomic-capture is made up of two statements, either an update = read, + // read + update, or read + write. As a result, the IR represents the + // capture region as having those two 'inside' of it. + auto op = mlir::acc::AtomicCaptureOp::create(builder, start, /*ifCond=*/{}); + emitOpenACCClauses(op, s.getDirectiveKind(), s.clauses()); mlir::LogicalResult res = mlir::success(); { mlir::OpBuilder::InsertionGuard guardCase(builder); - mlir::Type argTy = cast<cir::PointerType>(x.getType()).getPointee(); - std::array<mlir::Type, 1> recipeType{argTy}; - std::array<mlir::Location, 1> recipeLoc{start}; - mlir::Block *recipeBlock = builder.createBlock( - &op.getRegion(), op.getRegion().end(), recipeType, recipeLoc); - builder.setInsertionPointToEnd(recipeBlock); - - // Since we have an initial value that we know is a scalar type, we can - // just emit the entire statement here after sneaking-in our 'alloca' in - // the right place, then loading out of it. Flang does a lot less work - // (probably does its own emitting!), but we have more complicated AST - // nodes to worry about, so we can just count on opt to remove the extra - // alloca/load/store set. - auto alloca = cir::AllocaOp::create( - builder, start, x.getType(), argTy, "x_var", - cgm.getSize(getContext().getTypeAlignInChars(inf.X->getType()))); - - alloca.setInitAttr(mlir::UnitAttr::get(&getMLIRContext())); - builder.CIRBaseBuilderTy::createStore(start, recipeBlock->getArgument(0), - alloca); - - const VarDecl *xval = getLValueDecl(inf.X); - CIRGenFunction::DeclMapRevertingRAII declMapRAII{*this, xval}; - replaceAddrOfLocalVar( - xval, Address{alloca, argTy, getContext().getDeclAlign(xval)}); - - res = emitStmt(s.getAssociatedStmt(), /*useCurrentScope=*/true); - - auto load = cir::LoadOp::create(builder, start, {alloca}); - mlir::acc::YieldOp::create(builder, end, {load}); - } + mlir::Block *block = + builder.createBlock(&op.getRegion(), op.getRegion().end(), {}, {}); + + builder.setInsertionPointToStart(block); + + auto terminator = mlir::acc::TerminatorOp::create(builder, end); + + // The AtomicCaptureOp only permits the two acc.atomic.* operations inside + // of it, so all other parts of the expression need to be emitted before + // the AtomicCaptureOp, then moved into place. + builder.setInsertionPoint(op); + + switch (inf.Form) { + default: + llvm_unreachable("invalid form for Capture"); + case OpenACCAtomicConstruct::StmtInfo::StmtForm::ReadWrite: { + mlir::acc::AtomicReadOp first = + emitAtomicRead(*this, builder, start, inf.First); + mlir::acc::AtomicWriteOp second = + emitAtomicWrite(*this, builder, start, inf.Second); + + first->moveBefore(terminator); + second->moveBefore(terminator); + break; + } + case OpenACCAtomicConstruct::StmtInfo::StmtForm::ReadUpdate: { + mlir::acc::AtomicReadOp first = + emitAtomicRead(*this, builder, start, inf.First); + auto [this_res, second] = + emitAtomicUpdate(*this, builder, start, end, inf.Second); + res = this_res; + + first->moveBefore(terminator); + second->moveBefore(terminator); + break; + } + case OpenACCAtomicConstruct::StmtInfo::StmtForm::UpdateRead: { + auto [this_res, first] = + emitAtomicUpdate(*this, builder, start, end, inf.First); + res = this_res; + mlir::acc::AtomicReadOp second = + emitAtomicRead(*this, builder, start, inf.Second); + + first->moveBefore(terminator); + second->moveBefore(terminator); + break; + } + } + } return res; } } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp index c5b89bd..406b1a5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp @@ -117,8 +117,7 @@ CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) { // // Emit all clauses. - emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(), - s.clauses()); + emitOpenACCClauses(op, s.getDirectiveKind(), s.clauses()); updateLoopOpParallelism(op, s.isOrphanedLoopConstruct(), s.getParentComputeConstructKind()); diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index 03618d4..7f000ec 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -97,7 +97,8 @@ std::string CIRGenTypes::getRecordTypeName(const clang::RecordDecl *recordDecl, llvm::raw_svector_ostream outStream(typeName); PrintingPolicy policy = recordDecl->getASTContext().getPrintingPolicy(); - policy.SuppressInlineNamespace = false; + policy.SuppressInlineNamespace = + llvm::to_underlying(PrintingPolicy::SuppressInlineNamespaceMode::None); policy.AlwaysIncludeTypeForTemplateArgument = true; policy.PrintAsCanonical = true; policy.SuppressTagKeyword = true; @@ -404,7 +405,7 @@ mlir::Type CIRGenTypes::convertType(QualType type) { const ReferenceType *refTy = cast<ReferenceType>(ty); QualType elemTy = refTy->getPointeeType(); auto pointeeType = convertTypeForMem(elemTy); - resultType = builder.getPointerTo(pointeeType); + resultType = builder.getPointerTo(pointeeType, elemTy.getAddressSpace()); assert(resultType && "Cannot get pointer type?"); break; } @@ -481,6 +482,21 @@ mlir::Type CIRGenTypes::convertType(QualType type) { break; } + case Type::MemberPointer: { + const auto *mpt = cast<MemberPointerType>(ty); + + mlir::Type memberTy = convertType(mpt->getPointeeType()); + auto clsTy = mlir::cast<cir::RecordType>( + convertType(QualType(mpt->getQualifier().getAsType(), 0))); + if (mpt->isMemberDataPointer()) { + resultType = cir::DataMemberType::get(memberTy, clsTy); + } else { + assert(!cir::MissingFeatures::methodType()); + cgm.errorNYI(SourceLocation(), "MethodType"); + } + break; + } + case Type::FunctionNoProto: case Type::FunctionProto: resultType = convertFunctionTypeInternal(type); diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index ab245a77..2002bd7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -49,6 +49,7 @@ public: bool isScalar() const { return flavor == Scalar; } bool isComplex() const { return flavor == Complex; } bool isAggregate() const { return flavor == Aggregate; } + bool isIgnored() const { return isScalar() && !getValue(); } bool isVolatileQualified() const { return isVolatile; } @@ -166,7 +167,8 @@ class LValue { // this is the alignment of the whole vector) unsigned alignment; mlir::Value v; - mlir::Value vectorIdx; // Index for vector subscript + mlir::Value vectorIdx; // Index for vector subscript + mlir::Attribute vectorElts; // ExtVector element subset: V.xyx mlir::Type elementType; LValueBaseInfo baseInfo; const CIRGenBitFieldInfo *bitFieldInfo{nullptr}; @@ -190,6 +192,7 @@ public: bool isSimple() const { return lvType == Simple; } bool isVectorElt() const { return lvType == VectorElt; } bool isBitField() const { return lvType == BitField; } + bool isExtVectorElt() const { return lvType == ExtVectorElt; } bool isGlobalReg() const { return lvType == GlobalReg; } bool isVolatile() const { return quals.hasVolatile(); } @@ -254,6 +257,22 @@ public: return vectorIdx; } + // extended vector elements. + Address getExtVectorAddress() const { + assert(isExtVectorElt()); + return Address(getExtVectorPointer(), elementType, getAlignment()); + } + + mlir::Value getExtVectorPointer() const { + assert(isExtVectorElt()); + return v; + } + + mlir::ArrayAttr getExtVectorElts() const { + assert(isExtVectorElt()); + return mlir::cast<mlir::ArrayAttr>(vectorElts); + } + static LValue makeVectorElt(Address vecAddress, mlir::Value index, clang::QualType t, LValueBaseInfo baseInfo) { LValue r; @@ -265,6 +284,19 @@ public: return r; } + static LValue makeExtVectorElt(Address vecAddress, mlir::ArrayAttr elts, + clang::QualType type, + LValueBaseInfo baseInfo) { + LValue r; + r.lvType = ExtVectorElt; + r.v = vecAddress.getPointer(); + r.elementType = vecAddress.getElementType(); + r.vectorElts = elts; + r.initialize(type, type.getQualifiers(), vecAddress.getAlignment(), + baseInfo); + return r; + } + // bitfield lvalue Address getBitFieldAddress() const { return Address(getBitFieldPointer(), elementType, getAlignment()); diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp b/clang/lib/CIR/CodeGen/CIRGenerator.cpp index aa4d9eb..0208eee 100644 --- a/clang/lib/CIR/CodeGen/CIRGenerator.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp @@ -166,6 +166,18 @@ void CIRGenerator::HandleCXXStaticMemberVarInstantiation(VarDecl *D) { cgm->handleCXXStaticMemberVarInstantiation(D); } +void CIRGenerator::HandleOpenACCRoutineReference(const FunctionDecl *FD, + const OpenACCRoutineDecl *RD) { + llvm::StringRef mangledName = cgm->getMangledName(FD); + cir::FuncOp entry = + mlir::dyn_cast_if_present<cir::FuncOp>(cgm->getGlobalValue(mangledName)); + + // if this wasn't generated, don't force it to be. + if (!entry) + return; + cgm->emitOpenACCRoutineDecl(FD, entry, RD->getBeginLoc(), RD->clauses()); +} + void CIRGenerator::CompleteTentativeDefinition(VarDecl *d) { if (diags.hasErrorOccurred()) return; diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 7c31bea..d6cd150 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -12,6 +12,7 @@ add_clang_library(clangCIR CIRGenAtomic.cpp CIRGenBuilder.cpp CIRGenBuiltin.cpp + CIRGenBuiltinAArch64.cpp CIRGenBuiltinX86.cpp CIRGenCall.cpp CIRGenClass.cpp @@ -35,6 +36,7 @@ add_clang_library(clangCIR CIRGenOpenACC.cpp CIRGenOpenACCClause.cpp CIRGenOpenACCRecipe.cpp + CIRGenPointerAuth.cpp CIRGenRecordLayoutBuilder.cpp CIRGenStmt.cpp CIRGenStmtOpenACC.cpp diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h index 4198c23..2ebe6df 100644 --- a/clang/lib/CIR/CodeGen/EHScopeStack.h +++ b/clang/lib/CIR/CodeGen/EHScopeStack.h @@ -127,13 +127,43 @@ public: virtual ~Cleanup() = default; + /// Generation flags. + class Flags { + enum { + F_IsForEH = 0x1, + F_IsNormalCleanupKind = 0x2, + F_IsEHCleanupKind = 0x4, + F_HasExitSwitch = 0x8, + }; + unsigned flags = 0; + + public: + Flags() = default; + + /// isForEH - true if the current emission is for an EH cleanup. + bool isForEHCleanup() const { return flags & F_IsForEH; } + bool isForNormalCleanup() const { return !isForEHCleanup(); } + void setIsForEHCleanup() { flags |= F_IsForEH; } + + bool isNormalCleanupKind() const { return flags & F_IsNormalCleanupKind; } + void setIsNormalCleanupKind() { flags |= F_IsNormalCleanupKind; } + + /// isEHCleanupKind - true if the cleanup was pushed as an EH + /// cleanup. + bool isEHCleanupKind() const { return flags & F_IsEHCleanupKind; } + void setIsEHCleanupKind() { flags |= F_IsEHCleanupKind; } + + bool hasExitSwitch() const { return flags & F_HasExitSwitch; } + void setHasExitSwitch() { flags |= F_HasExitSwitch; } + }; + /// Emit the cleanup. For normal cleanups, this is run in the /// same EH context as when the cleanup was pushed, i.e. the /// immediately-enclosing context of the cleanup scope. For /// EH cleanups, this is run in a terminate context. /// // \param flags cleanup kind. - virtual void emit(CIRGenFunction &cgf) = 0; + virtual void emit(CIRGenFunction &cgf, Flags flags) = 0; }; private: @@ -155,6 +185,9 @@ private: /// The innermost normal cleanup on the stack. stable_iterator innermostNormalCleanup = stable_end(); + /// The innermost EH scope on the stack. + stable_iterator innermostEHScope = stable_end(); + /// The CGF this Stack belong to CIRGenFunction *cgf = nullptr; @@ -214,6 +247,8 @@ public: /// Determines whether the exception-scopes stack is empty. bool empty() const { return startOfData == endOfBuffer; } + bool requiresCatchOrCleanup() const; + /// Determines whether there are any normal cleanups on the stack. bool hasNormalCleanups() const { return innermostNormalCleanup != stable_end(); @@ -226,6 +261,8 @@ public: } stable_iterator getInnermostActiveNormalCleanup() const; + stable_iterator getInnermostEHScope() const { return innermostEHScope; } + /// An unstable reference to a scope-stack depth. Invalidated by /// pushes but not pops. class iterator; @@ -233,6 +270,9 @@ public: /// Returns an iterator pointing to the innermost EH scope. iterator begin() const; + /// Returns an iterator pointing to the outermost EH scope. + iterator end() const; + /// Create a stable reference to the top of the EH stack. The /// returned reference is valid until that scope is popped off the /// stack. diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 62a8c59..377c532 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -1,5 +1,8 @@ #include "TargetInfo.h" #include "ABIInfo.h" +#include "CIRGenFunction.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" using namespace clang; using namespace clang::CIRGen; @@ -68,3 +71,14 @@ bool TargetCIRGenInfo::isNoProtoCallVariadic( // For everything else, we just prefer false unless we opt out. return false; } + +mlir::Value TargetCIRGenInfo::performAddrSpaceCast( + CIRGenFunction &cgf, mlir::Value v, cir::TargetAddressSpaceAttr srcAddr, + mlir::Type destTy, bool isNonNull) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + if (cir::GlobalOp globalOp = v.getDefiningOp<cir::GlobalOp>()) + cgf.cgm.errorNYI("Global op addrspace cast"); + // Try to preserve the source's name to make IR more readable. + return cgf.getBuilder().createAddrSpaceCast(v, destTy); +} diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index dbb0312..7268264 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -17,6 +17,7 @@ #include "ABIInfo.h" #include "CIRGenTypes.h" #include "clang/Basic/AddressSpaces.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include <memory> #include <utility> @@ -33,6 +34,8 @@ bool isEmptyFieldForLayout(const ASTContext &context, const FieldDecl *fd); /// if the [[no_unique_address]] attribute would have made them empty. bool isEmptyRecordForLayout(const ASTContext &context, QualType t); +class CIRGenFunction; + class TargetCIRGenInfo { std::unique_ptr<ABIInfo> info; @@ -48,6 +51,15 @@ public: virtual cir::TargetAddressSpaceAttr getCIRAllocaAddressSpace() const { return {}; } + /// Perform address space cast of an expression of pointer type. + /// \param V is the value to be casted to another address space. + /// \param DestTy is the destination pointer type. + /// \param srcAS is theaddress space of \p V. + /// \param IsNonNull is the flag indicating \p V is known to be non null. + virtual mlir::Value performAddrSpaceCast(CIRGenFunction &cgf, mlir::Value v, + cir::TargetAddressSpaceAttr srcAddr, + mlir::Type destTy, + bool isNonNull = false) const; /// Determine whether a call to an unprototyped functions under /// the given calling convention should use the variadic |
