diff options
Diffstat (limited to 'clang/lib')
42 files changed, 629 insertions, 168 deletions
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 0f2762d..22bb4cb 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -456,7 +456,9 @@ CheckStructurallyEquivalentAttributes(StructuralEquivalenceContext &Context, const Decl *D1, const Decl *D2, const Decl *PrimaryDecl = nullptr) { // If either declaration has an attribute on it, we treat the declarations - // as not being structurally equivalent. + // as not being structurally equivalent unless both declarations are implicit + // (ones generated by the compiler like __NSConstantString_tag). + // // FIXME: this should be handled on a case-by-case basis via tablegen in // Attr.td. There are multiple cases to consider: one declaration with the // attribute, another without it; different attribute syntax|spellings for @@ -468,7 +470,7 @@ CheckStructurallyEquivalentAttributes(StructuralEquivalenceContext &Context, D1Attr = *D1->getAttrs().begin(); if (D2->hasAttrs()) D2Attr = *D2->getAttrs().begin(); - if (D1Attr || D2Attr) { + if ((D1Attr || D2Attr) && !D1->isImplicit() && !D2->isImplicit()) { const auto *DiagnoseDecl = cast<TypeDecl>(PrimaryDecl ? PrimaryDecl : D2); Context.Diag2(DiagnoseDecl->getLocation(), diag::warn_odr_tag_type_with_attributes) @@ -870,7 +872,27 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, else if (T1->getTypeClass() == Type::FunctionNoProto && T2->getTypeClass() == Type::FunctionProto) TC = Type::FunctionNoProto; - else + else if (Context.LangOpts.C23 && !Context.StrictTypeSpelling && + (T1->getTypeClass() == Type::Enum || + T2->getTypeClass() == Type::Enum)) { + // In C23, if not being strict about token equivalence, we need to handle + // the case where one type is an enumeration and the other type is an + // integral type. + // + // C23 6.7.3.3p16: The enumerated type is compatible with the underlying + // type of the enumeration. + // + // Treat the enumeration as its underlying type and use the builtin type + // class comparison. + if (T1->getTypeClass() == Type::Enum) { + T1 = T1->getAs<EnumType>()->getDecl()->getIntegerType(); + assert(T2->isBuiltinType() && !T1.isNull()); // Sanity check + } else if (T2->getTypeClass() == Type::Enum) { + T2 = T2->getAs<EnumType>()->getDecl()->getIntegerType(); + assert(T1->isBuiltinType() && !T2.isNull()); // Sanity check + } + TC = Type::Builtin; + } else return false; } @@ -2071,6 +2093,48 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, !CheckStructurallyEquivalentAttributes(Context, D1, D2)) return false; + // In C23, if one enumeration has a fixed underlying type, the other shall + // have a compatible fixed underlying type (6.2.7). + if (Context.LangOpts.C23) { + if (D1->isFixed() != D2->isFixed()) { + if (Context.Complain) { + Context.Diag2(D2->getLocation(), + Context.getApplicableDiagnostic( + diag::err_odr_tag_type_inconsistent)) + << Context.ToCtx.getTypeDeclType(D2) + << (&Context.FromCtx != &Context.ToCtx); + Context.Diag1(D1->getLocation(), + D1->isFixed() + ? diag::note_odr_fixed_underlying_type + : diag::note_odr_missing_fixed_underlying_type) + << D1; + Context.Diag2(D2->getLocation(), + D2->isFixed() + ? diag::note_odr_fixed_underlying_type + : diag::note_odr_missing_fixed_underlying_type) + << D2; + } + return false; + } + if (D1->isFixed()) { + assert(D2->isFixed() && "enums expected to have fixed underlying types"); + if (!IsStructurallyEquivalent(Context, D1->getIntegerType(), + D2->getIntegerType())) { + if (Context.Complain) { + Context.Diag2(D2->getLocation(), + Context.getApplicableDiagnostic( + diag::err_odr_tag_type_inconsistent)) + << Context.ToCtx.getTypeDeclType(D2) + << (&Context.FromCtx != &Context.ToCtx); + Context.Diag2(D2->getLocation(), + diag::note_odr_incompatible_fixed_underlying_type) + << D2 << D2->getIntegerType() << D1->getIntegerType(); + } + return false; + } + } + } + llvm::SmallVector<const EnumConstantDecl *, 8> D1Enums, D2Enums; auto CopyEnumerators = [](auto &&Range, llvm::SmallVectorImpl<const EnumConstantDecl *> &Cont) { diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 9012442..2602ed7 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -481,13 +481,11 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) { Floating RA = S.allocFloat(A.getSemantics()); RA.copy(ResR); Result.elem<Floating>(0) = RA; // Floating(ResR); - Result.atIndex(0).initialize(); Floating RI = S.allocFloat(A.getSemantics()); RI.copy(ResI); Result.elem<Floating>(1) = RI; // Floating(ResI); - Result.atIndex(1).initialize(); - Result.initialize(); + Result.initializeAllElements(); } else { // Integer element type. const T &LHSR = LHS.elem<T>(0); @@ -505,7 +503,6 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) { return false; if (T::sub(A, B, Bits, &Result.elem<T>(0))) return false; - Result.atIndex(0).initialize(); // imag(Result) = (real(LHS) * imag(RHS)) + (imag(LHS) * real(RHS)) if (T::mul(LHSR, RHSI, Bits, &A)) @@ -514,8 +511,8 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) { return false; if (T::add(A, B, Bits, &Result.elem<T>(1))) return false; - Result.atIndex(1).initialize(); Result.initialize(); + Result.initializeAllElements(); } return true; @@ -541,14 +538,12 @@ inline bool Divc(InterpState &S, CodePtr OpPC) { Floating RA = S.allocFloat(A.getSemantics()); RA.copy(ResR); Result.elem<Floating>(0) = RA; // Floating(ResR); - Result.atIndex(0).initialize(); Floating RI = S.allocFloat(A.getSemantics()); RI.copy(ResI); Result.elem<Floating>(1) = RI; // Floating(ResI); - Result.atIndex(1).initialize(); - Result.initialize(); + Result.initializeAllElements(); } else { // Integer element type. const T &LHSR = LHS.elem<T>(0); @@ -590,7 +585,6 @@ inline bool Divc(InterpState &S, CodePtr OpPC) { return false; if (T::div(ResultR, Den, Bits, &ResultR)) return false; - Result.atIndex(0).initialize(); // imag(Result) = ((imag(LHS) * real(RHS)) - (real(LHS) * imag(RHS))) / Den if (T::mul(LHSI, RHSR, Bits, &A) || T::mul(LHSR, RHSI, Bits, &B)) @@ -599,8 +593,7 @@ inline bool Divc(InterpState &S, CodePtr OpPC) { return false; if (T::div(ResultI, Den, Bits, &ResultI)) return false; - Result.atIndex(1).initialize(); - Result.initialize(); + Result.initializeAllElements(); } return true; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3ece7054..f908d02 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1099,10 +1099,8 @@ static bool interp__builtin_complex(InterpState &S, CodePtr OpPC, Pointer &Result = S.Stk.peek<Pointer>(); Result.elem<Floating>(0) = Arg1; - Result.atIndex(0).initialize(); Result.elem<Floating>(1) = Arg2; - Result.atIndex(1).initialize(); - Result.initialize(); + Result.initializeAllElements(); return true; } @@ -1728,9 +1726,9 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC, Dst.elem<T>(I) = T::from(Arg.elem<T>(I).toAPSInt().reverseBits().getZExtValue()); } - Dst.atIndex(I).initialize(); }); } + Dst.initializeAllElements(); return true; } @@ -2314,12 +2312,10 @@ static bool interp__builtin_elementwise_sat(InterpState &S, CodePtr OpPC, llvm_unreachable("Wrong builtin ID"); } - INT_TYPE_SWITCH_NO_BOOL(ElemT, { - const Pointer &E = Dst.atIndex(I); - E.deref<T>() = static_cast<T>(Result); - E.initialize(); - }); + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem<T>(I) = static_cast<T>(Result); }); } + Dst.initializeAllElements(); return true; } diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp index 9341bc1..4753a4e1 100644 --- a/clang/lib/AST/ByteCode/Pointer.cpp +++ b/clang/lib/AST/ByteCode/Pointer.cpp @@ -495,6 +495,19 @@ void Pointer::initialize() const { getInlineDesc()->IsInitialized = true; } +void Pointer::initializeAllElements() const { + assert(getFieldDesc()->isPrimitiveArray()); + assert(isArrayRoot()); + + InitMapPtr &IM = getInitMap(); + if (!IM) { + IM = std::make_pair(true, nullptr); + } else { + IM->first = true; + IM->second.reset(); + } +} + void Pointer::activate() const { // Field has its bit in an inline descriptor. assert(PointeeStorage.BS.Base != 0 && diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index 059f176..9fe224c 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -725,6 +725,10 @@ public: /// Initializes a field. void initialize() const; + /// Initialize all elements of a primitive array at once. This can be + /// used in situations where we *know* we have initialized *all* elements + /// of a primtive array. + void initializeAllElements() const; /// Activats a field. void activate() const; /// Deactivates an entire strurcutre. diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 6a74e98..760b2fc 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1953,7 +1953,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, // silently there. For other targets that have ms_struct enabled // (most probably via a pragma or attribute), trigger a diagnostic // that defaults to an error. - if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) + if (!Context.getTargetInfo().getTriple().isOSCygMing()) Diag(D->getLocation(), diag::warn_npot_ms_struct); } if (TypeSize > FieldAlign && diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index fc4ec78..7481e1e 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -368,11 +368,6 @@ void FileManager::trackVFSUsage(bool Active) { }); } -const FileEntry *FileManager::getVirtualFile(StringRef Filename, off_t Size, - time_t ModificationTime) { - return &getVirtualFileRef(Filename, Size, ModificationTime).getFileEntry(); -} - FileEntryRef FileManager::getVirtualFileRef(StringRef Filename, off_t Size, time_t ModificationTime) { ++NumFileLookups; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index cebcfa3..52cbdbc 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -266,8 +266,11 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); - for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) - ReadOnlyFeatures.insert(F); + + for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) { + if (GPUKind != llvm::AMDGPU::GK_NONE) + ReadOnlyFeatures.insert(F); + } HalfArgsAndReturns = true; } diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 29de34bb..6bec2fa 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -618,21 +618,21 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features, LDREX = 0; else if (ArchKind == llvm::ARM::ArchKind::ARMV6K || ArchKind == llvm::ARM::ArchKind::ARMV6KZ) - LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B; + LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B; else - LDREX = LDREX_W; + LDREX = ARM_LDREX_W; break; case 7: case 8: if (ArchProfile == llvm::ARM::ProfileKind::M) - LDREX = LDREX_W | LDREX_H | LDREX_B; + LDREX = ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B; else - LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B; + LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B; break; case 9: assert(ArchProfile != llvm::ARM::ProfileKind::M && "No Armv9-M architectures defined"); - LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B; + LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B; } if (!(FPU & NeonFPU) && FPMath == FP_Neon) { diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 1719217..43c4718 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -98,13 +98,6 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { LLVM_PREFERRED_TYPE(bool) unsigned HasBTI : 1; - enum { - LDREX_B = (1 << 0), /// byte (8-bit) - LDREX_H = (1 << 1), /// half (16-bit) - LDREX_W = (1 << 2), /// word (32-bit) - LDREX_D = (1 << 3), /// double (64-bit) - }; - uint32_t LDREX; // ACLE 6.5.1 Hardware floating point @@ -225,6 +218,8 @@ public: bool hasBitIntType() const override { return true; } + unsigned getARMLDREXMask() const override { return LDREX; } + const char *getBFloat16Mangling() const override { return "u6__bf16"; }; std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override { diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index ef136f8..9049a01 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -190,6 +190,11 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, assert(!cir::MissingFeatures::builtinCheckKind()); return emitBuiltinBitOp<cir::BitClzOp>(*this, e, /*poisonZero=*/true); + case Builtin::BI__builtin_ffs: + case Builtin::BI__builtin_ffsl: + case Builtin::BI__builtin_ffsll: + return emitBuiltinBitOp<cir::BitFfsOp>(*this, e); + case Builtin::BI__builtin_parity: case Builtin::BI__builtin_parityl: case Builtin::BI__builtin_parityll: diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 938d143..fc208ff 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -582,6 +582,14 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo, cir::FuncOp directFuncOp; if (auto fnOp = dyn_cast<cir::FuncOp>(calleePtr)) { directFuncOp = fnOp; + } else if (auto getGlobalOp = mlir::dyn_cast<cir::GetGlobalOp>(calleePtr)) { + // FIXME(cir): This peephole optimization avoids indirect calls for + // builtins. This should be fixed in the builtin declaration instead by + // not emitting an unecessary get_global in the first place. + // However, this is also used for no-prototype functions. + mlir::Operation *globalOp = cgm.getGlobalValue(getGlobalOp.getName()); + assert(globalOp && "undefined global function"); + directFuncOp = mlir::cast<cir::FuncOp>(globalOp); } else { [[maybe_unused]] mlir::ValueTypeRange<mlir::ResultRange> resultTypes = calleePtr->getResultTypes(); diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h index bd11329..a78956b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.h +++ b/clang/lib/CIR/CodeGen/CIRGenCall.h @@ -116,6 +116,11 @@ public: assert(isOrdinary()); return reinterpret_cast<mlir::Operation *>(kindOrFunctionPtr); } + + void setFunctionPointer(mlir::Operation *functionPtr) { + assert(isOrdinary()); + kindOrFunctionPtr = SpecialKind(reinterpret_cast<uintptr_t>(functionPtr)); + } }; /// Type for representing both the decl and type of parameters to a function. diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index a28ac3c..6527fb5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -649,6 +649,38 @@ void CIRGenFunction::emitNullabilityCheck(LValue lhs, mlir::Value rhs, assert(!cir::MissingFeatures::sanitizers()); } +/// Destroys all the elements of the given array, beginning from last to first. +/// The array cannot be zero-length. +/// +/// \param begin - a type* denoting the first element of the array +/// \param end - a type* denoting one past the end of the array +/// \param elementType - the element type of the array +/// \param destroyer - the function to call to destroy elements +void CIRGenFunction::emitArrayDestroy(mlir::Value begin, mlir::Value end, + QualType elementType, + CharUnits elementAlign, + Destroyer *destroyer) { + assert(!elementType->isArrayType()); + + // Differently from LLVM traditional codegen, use a higher level + // representation instead of lowering directly to a loop. + mlir::Type cirElementType = convertTypeForMem(elementType); + cir::PointerType ptrToElmType = builder.getPointerTo(cirElementType); + + // Emit the dtor call that will execute for every array element. + cir::ArrayDtor::create( + builder, *currSrcLoc, begin, [&](mlir::OpBuilder &b, mlir::Location loc) { + auto arg = b.getInsertionBlock()->addArgument(ptrToElmType, loc); + Address curAddr = Address(arg, cirElementType, elementAlign); + assert(!cir::MissingFeatures::dtorCleanups()); + + // Perform the actual destruction there. + destroyer(*this, curAddr, elementType); + + cir::YieldOp::create(builder, loc); + }); +} + /// Immediately perform the destruction of the given object. /// /// \param addr - the address of the object; a type* @@ -658,10 +690,34 @@ void CIRGenFunction::emitNullabilityCheck(LValue lhs, mlir::Value rhs, /// elements void CIRGenFunction::emitDestroy(Address addr, QualType type, Destroyer *destroyer) { - if (getContext().getAsArrayType(type)) - cgm.errorNYI("emitDestroy: array type"); + const ArrayType *arrayType = getContext().getAsArrayType(type); + if (!arrayType) + return destroyer(*this, addr, type); + + mlir::Value length = emitArrayLength(arrayType, type, addr); + + CharUnits elementAlign = addr.getAlignment().alignmentOfArrayElement( + getContext().getTypeSizeInChars(type)); + + auto constantCount = length.getDefiningOp<cir::ConstantOp>(); + if (!constantCount) { + assert(!cir::MissingFeatures::vlas()); + cgm.errorNYI("emitDestroy: variable length array"); + return; + } + + auto constIntAttr = mlir::dyn_cast<cir::IntAttr>(constantCount.getValue()); + // If it's constant zero, we can just skip the entire thing. + if (constIntAttr && constIntAttr.getUInt() == 0) + return; + + mlir::Value begin = addr.getPointer(); + mlir::Value end; // This will be used for future non-constant counts. + emitArrayDestroy(begin, end, type, elementAlign, destroyer); - return destroyer(*this, addr, type); + // If the array destroy didn't use the length op, we can erase it. + if (constantCount.use_empty()) + constantCount.erase(); } CIRGenFunction::Destroyer * diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 64dc1ce..c18498f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -1280,7 +1280,7 @@ RValue CIRGenFunction::getUndefRValue(QualType ty) { } RValue CIRGenFunction::emitCall(clang::QualType calleeTy, - const CIRGenCallee &callee, + const CIRGenCallee &origCallee, const clang::CallExpr *e, ReturnValueSlot returnValue) { // Get the actual function type. The callee type will always be a pointer to @@ -1291,6 +1291,8 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy, calleeTy = getContext().getCanonicalType(calleeTy); auto pointeeTy = cast<PointerType>(calleeTy)->getPointeeType(); + CIRGenCallee callee = origCallee; + if (getLangOpts().CPlusPlus) assert(!cir::MissingFeatures::sanitizers()); @@ -1307,7 +1309,44 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy, const CIRGenFunctionInfo &funcInfo = cgm.getTypes().arrangeFreeFunctionCall(args, fnType); - assert(!cir::MissingFeatures::opCallNoPrototypeFunc()); + // C99 6.5.2.2p6: + // If the expression that denotes the called function has a type that does + // not include a prototype, [the default argument promotions are performed]. + // If the number of arguments does not equal the number of parameters, the + // behavior is undefined. If the function is defined with a type that + // includes a prototype, and either the prototype ends with an ellipsis (, + // ...) or the types of the arguments after promotion are not compatible + // with the types of the parameters, the behavior is undefined. If the + // function is defined with a type that does not include a prototype, and + // the types of the arguments after promotion are not compatible with those + // of the parameters after promotion, the behavior is undefined [except in + // some trivial cases]. + // That is, in the general case, we should assume that a call through an + // unprototyped function type works like a *non-variadic* call. The way we + // make this work is to cast to the exxact type fo the promoted arguments. + if (isa<FunctionNoProtoType>(fnType)) { + assert(!cir::MissingFeatures::opCallChain()); + assert(!cir::MissingFeatures::addressSpace()); + cir::FuncType calleeTy = getTypes().getFunctionType(funcInfo); + // get non-variadic function type + calleeTy = cir::FuncType::get(calleeTy.getInputs(), + calleeTy.getReturnType(), false); + auto calleePtrTy = cir::PointerType::get(calleeTy); + + mlir::Operation *fn = callee.getFunctionPointer(); + mlir::Value addr; + if (auto funcOp = mlir::dyn_cast<cir::FuncOp>(fn)) { + addr = builder.create<cir::GetGlobalOp>( + getLoc(e->getSourceRange()), + cir::PointerType::get(funcOp.getFunctionType()), funcOp.getSymName()); + } else { + addr = fn->getResult(0); + } + + fn = builder.createBitcast(addr, calleePtrTy).getDefiningOp(); + callee.setFunctionPointer(fn); + } + assert(!cir::MissingFeatures::opCallFnInfoOpts()); assert(!cir::MissingFeatures::hip()); assert(!cir::MissingFeatures::opCallMustTail()); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 77539d7..603f750 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -848,6 +848,10 @@ public: /// even if no aggregate location is provided. RValue emitAnyExprToTemp(const clang::Expr *e); + void emitArrayDestroy(mlir::Value begin, mlir::Value end, + QualType elementType, CharUnits elementAlign, + Destroyer *destroyer); + mlir::Value emitArrayLength(const clang::ArrayType *arrayType, QualType &baseType, Address &addr); LValue emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 0724cb1..750fe97 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1103,6 +1103,60 @@ cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator( return cir::GlobalLinkageKind::ExternalLinkage; } +/// This function is called when we implement a function with no prototype, e.g. +/// "int foo() {}". If there are existing call uses of the old function in the +/// module, this adjusts them to call the new function directly. +/// +/// This is not just a cleanup: the always_inline pass requires direct calls to +/// functions to be able to inline them. If there is a bitcast in the way, it +/// won't inline them. Instcombine normally deletes these calls, but it isn't +/// run at -O0. +void CIRGenModule::replaceUsesOfNonProtoTypeWithRealFunction( + mlir::Operation *old, cir::FuncOp newFn) { + // If we're redefining a global as a function, don't transform it. + auto oldFn = mlir::dyn_cast<cir::FuncOp>(old); + if (!oldFn) + return; + + // TODO(cir): this RAUW ignores the features below. + assert(!cir::MissingFeatures::opFuncExceptions()); + assert(!cir::MissingFeatures::opFuncParameterAttributes()); + assert(!cir::MissingFeatures::opFuncOperandBundles()); + if (oldFn->getAttrs().size() <= 1) + errorNYI(old->getLoc(), + "replaceUsesOfNonProtoTypeWithRealFunction: Attribute forwarding"); + + // Mark new function as originated from a no-proto declaration. + newFn.setNoProto(oldFn.getNoProto()); + + // Iterate through all calls of the no-proto function. + std::optional<mlir::SymbolTable::UseRange> symUses = + oldFn.getSymbolUses(oldFn->getParentOp()); + for (const mlir::SymbolTable::SymbolUse &use : symUses.value()) { + mlir::OpBuilder::InsertionGuard guard(builder); + + if (auto noProtoCallOp = mlir::dyn_cast<cir::CallOp>(use.getUser())) { + builder.setInsertionPoint(noProtoCallOp); + + // Patch call type with the real function type. + cir::CallOp realCallOp = builder.createCallOp( + noProtoCallOp.getLoc(), newFn, noProtoCallOp.getOperands()); + + // Replace old no proto call with fixed call. + noProtoCallOp.replaceAllUsesWith(realCallOp); + noProtoCallOp.erase(); + } else if (auto getGlobalOp = + mlir::dyn_cast<cir::GetGlobalOp>(use.getUser())) { + // Replace type + getGlobalOp.getAddr().setType( + cir::PointerType::get(newFn.getFunctionType())); + } else { + errorNYI(use.getUser()->getLoc(), + "replaceUsesOfNonProtoTypeWithRealFunction: unexpected use"); + } + } +} + cir::GlobalLinkageKind CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) { assert(!isConstant && "constant variables NYI"); @@ -1539,10 +1593,10 @@ static bool shouldAssumeDSOLocal(const CIRGenModule &cgm, const llvm::Triple &tt = cgm.getTriple(); const CodeGenOptions &cgOpts = cgm.getCodeGenOpts(); - if (tt.isWindowsGNUEnvironment()) { - // In MinGW, variables without DLLImport can still be automatically - // imported from a DLL by the linker; don't mark variables that - // potentially could come from another DLL as DSO local. + if (tt.isOSCygMing()) { + // In MinGW and Cygwin, variables without DLLImport can still be + // automatically imported from a DLL by the linker; don't mark variables + // that potentially could come from another DLL as DSO local. // With EmulatedTLS, TLS variables can be autoimported from other DLLs // (and this actually happens in the public interface of libstdc++), so @@ -1701,8 +1755,7 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction( // Lookup the entry, lazily creating it if necessary. mlir::Operation *entry = getGlobalValue(mangledName); if (entry) { - if (!isa<cir::FuncOp>(entry)) - errorNYI(d->getSourceRange(), "getOrCreateCIRFunction: non-FuncOp"); + assert(mlir::isa<cir::FuncOp>(entry)); assert(!cir::MissingFeatures::weakRefReference()); @@ -1738,6 +1791,30 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction( invalidLoc ? theModule->getLoc() : getLoc(funcDecl->getSourceRange()), mangledName, mlir::cast<cir::FuncType>(funcType), funcDecl); + // If we already created a function with the same mangled name (but different + // type) before, take its name and add it to the list of functions to be + // replaced with F at the end of CodeGen. + // + // This happens if there is a prototype for a function (e.g. "int f()") and + // then a definition of a different type (e.g. "int f(int x)"). + if (entry) { + + // Fetch a generic symbol-defining operation and its uses. + auto symbolOp = mlir::cast<mlir::SymbolOpInterface>(entry); + + // This might be an implementation of a function without a prototype, in + // which case, try to do special replacement of calls which match the new + // prototype. The really key thing here is that we also potentially drop + // arguments from the call site so as to make a direct call, which makes the + // inliner happier and suppresses a number of optimizer warnings (!) about + // dropping arguments. + if (symbolOp.getSymbolUses(symbolOp->getParentOp())) + replaceUsesOfNonProtoTypeWithRealFunction(entry, funcOp); + + // Obliterate no-proto declaration. + entry->erase(); + } + if (d) setFunctionAttributes(gd, funcOp, /*isIncompleteFunction=*/false, isThunk); @@ -1814,7 +1891,9 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name, func = builder.create<cir::FuncOp>(loc, name, funcType); assert(!cir::MissingFeatures::opFuncAstDeclAttr()); - assert(!cir::MissingFeatures::opFuncNoProto()); + + if (funcDecl && !funcDecl->hasPrototype()) + func.setNoProto(true); assert(func.isDeclaration() && "expected empty body"); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 22519ff..5d07d38 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -313,6 +313,9 @@ public: static void setInitializer(cir::GlobalOp &op, mlir::Attribute value); + void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old, + cir::FuncOp newFn); + cir::FuncOp getOrCreateCIRFunction(llvm::StringRef mangledName, mlir::Type funcType, clang::GlobalDecl gd, bool forVTable, diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp index 05e8848..e4ec380 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp @@ -438,9 +438,7 @@ CIRRecordLowering::accumulateBitFields(RecordDecl::field_iterator field, } else if (cirGenTypes.getCGModule() .getCodeGenOpts() .FineGrainedBitfieldAccesses) { - assert(!cir::MissingFeatures::nonFineGrainedBitfields()); - cirGenTypes.getCGModule().errorNYI(field->getSourceRange(), - "NYI FineGrainedBitfield"); + installBest = true; } else { // Otherwise, we're not installing. Update the bit size // of the current span to go all the way to limitOffset, which is diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 2213c75..1c3a310 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1470,10 +1470,14 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) { llvm::SMLoc loc = parser.getCurrentLocation(); mlir::Builder &builder = parser.getBuilder(); + mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name); mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name); mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name); mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name); + if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded()) + state.addAttribute(noProtoNameAttr, parser.getBuilder().getUnitAttr()); + // Default to external linkage if no keyword is provided. state.addAttribute(getLinkageAttrNameString(), GlobalLinkageKindAttr::get( @@ -1578,6 +1582,9 @@ mlir::Region *cir::FuncOp::getCallableRegion() { } void cir::FuncOp::print(OpAsmPrinter &p) { + if (getNoProto()) + p << " no_proto"; + if (getComdat()) p << " comdat"; @@ -2295,6 +2302,15 @@ OpFoldResult BitCtzOp::fold(FoldAdaptor adaptor) { getPoisonZero()); } +OpFoldResult BitFfsOp::fold(FoldAdaptor adaptor) { + return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) { + unsigned trailingZeros = inputValue.countTrailingZeros(); + unsigned result = + trailingZeros == inputValue.getBitWidth() ? 0 : trailingZeros + 1; + return llvm::APInt(inputValue.getBitWidth(), result); + }); +} + OpFoldResult BitParityOp::fold(FoldAdaptor adaptor) { return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) { return llvm::APInt(inputValue.getBitWidth(), inputValue.popcount() % 2); diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp index 2143f16..2eaa60c 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp @@ -143,7 +143,7 @@ void CIRCanonicalizePass::runOnOperation() { if (isa<BrOp, BrCondOp, CastOp, ScopeOp, SwitchOp, SelectOp, UnaryOp, ComplexCreateOp, ComplexImagOp, ComplexRealOp, VecCmpOp, VecCreateOp, VecExtractOp, VecShuffleOp, VecShuffleDynamicOp, - VecTernaryOp, BitClrsbOp, BitClzOp, BitCtzOp, BitParityOp, + VecTernaryOp, BitClrsbOp, BitClzOp, BitCtzOp, BitFfsOp, BitParityOp, BitPopcountOp, BitReverseOp, ByteSwapOp, RotateOp>(op)) ops.push_back(op); }); diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index cef83ea..ce3b30d 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -29,7 +29,8 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> { void runOnOp(mlir::Operation *op); void lowerCastOp(cir::CastOp op); void lowerUnaryOp(cir::UnaryOp op); - void lowerArrayCtor(ArrayCtor op); + void lowerArrayDtor(cir::ArrayDtor op); + void lowerArrayCtor(cir::ArrayCtor op); /// /// AST related @@ -172,28 +173,30 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) { static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, clang::ASTContext *astCtx, mlir::Operation *op, mlir::Type eltTy, - mlir::Value arrayAddr, - uint64_t arrayLen) { + mlir::Value arrayAddr, uint64_t arrayLen, + bool isCtor) { // Generate loop to call into ctor/dtor for every element. mlir::Location loc = op->getLoc(); - // TODO: instead of fixed integer size, create alias for PtrDiffTy and unify - // with CIRGen stuff. + // TODO: instead of getting the size from the AST context, create alias for + // PtrDiffTy and unify with CIRGen stuff. const unsigned sizeTypeSize = astCtx->getTypeSize(astCtx->getSignedSizeType()); - auto ptrDiffTy = - cir::IntType::get(builder.getContext(), sizeTypeSize, /*isSigned=*/false); - mlir::Value numArrayElementsConst = builder.getUnsignedInt(loc, arrayLen, 64); + uint64_t endOffset = isCtor ? arrayLen : arrayLen - 1; + mlir::Value endOffsetVal = + builder.getUnsignedInt(loc, endOffset, sizeTypeSize); - auto begin = builder.create<cir::CastOp>( - loc, eltTy, cir::CastKind::array_to_ptrdecay, arrayAddr); - mlir::Value end = builder.create<cir::PtrStrideOp>(loc, eltTy, begin, - numArrayElementsConst); + auto begin = cir::CastOp::create(builder, loc, eltTy, + cir::CastKind::array_to_ptrdecay, arrayAddr); + mlir::Value end = + cir::PtrStrideOp::create(builder, loc, eltTy, begin, endOffsetVal); + mlir::Value start = isCtor ? begin : end; + mlir::Value stop = isCtor ? end : begin; mlir::Value tmpAddr = builder.createAlloca( loc, /*addr type*/ builder.getPointerTo(eltTy), /*var type*/ eltTy, "__array_idx", builder.getAlignmentAttr(1)); - builder.createStore(loc, begin, tmpAddr); + builder.createStore(loc, start, tmpAddr); cir::DoWhileOp loop = builder.createDoWhile( loc, @@ -202,7 +205,7 @@ static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, auto currentElement = b.create<cir::LoadOp>(loc, eltTy, tmpAddr); mlir::Type boolTy = cir::BoolType::get(b.getContext()); auto cmp = builder.create<cir::CmpOp>(loc, boolTy, cir::CmpOpKind::ne, - currentElement, end); + currentElement, stop); builder.createCondition(cmp); }, /*bodyBuilder=*/ @@ -213,15 +216,19 @@ static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, op->walk([&](cir::CallOp c) { ctorCall = c; }); assert(ctorCall && "expected ctor call"); - auto one = builder.create<cir::ConstantOp>( - loc, ptrDiffTy, cir::IntAttr::get(ptrDiffTy, 1)); + // Array elements get constructed in order but destructed in reverse. + mlir::Value stride; + if (isCtor) + stride = builder.getUnsignedInt(loc, 1, sizeTypeSize); + else + stride = builder.getSignedInt(loc, -1, sizeTypeSize); - ctorCall->moveAfter(one); + ctorCall->moveBefore(stride.getDefiningOp()); ctorCall->setOperand(0, currentElement); + auto nextElement = cir::PtrStrideOp::create(builder, loc, eltTy, + currentElement, stride); - // Advance pointer and store them to temporary variable - auto nextElement = - builder.create<cir::PtrStrideOp>(loc, eltTy, currentElement, one); + // Store the element pointer to the temporary variable builder.createStore(loc, nextElement, tmpAddr); builder.createYield(loc); }); @@ -230,6 +237,18 @@ static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, op->erase(); } +void LoweringPreparePass::lowerArrayDtor(cir::ArrayDtor op) { + CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointAfter(op.getOperation()); + + mlir::Type eltTy = op->getRegion(0).getArgument(0).getType(); + assert(!cir::MissingFeatures::vlas()); + auto arrayLen = + mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize(); + lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(), arrayLen, + false); +} + void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) { cir::CIRBaseBuilderTy builder(getContext()); builder.setInsertionPointAfter(op.getOperation()); @@ -238,13 +257,15 @@ void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) { assert(!cir::MissingFeatures::vlas()); auto arrayLen = mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize(); - lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(), - arrayLen); + lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(), arrayLen, + true); } void LoweringPreparePass::runOnOp(mlir::Operation *op) { if (auto arrayCtor = dyn_cast<ArrayCtor>(op)) lowerArrayCtor(arrayCtor); + else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op)) + lowerArrayDtor(arrayDtor); else if (auto cast = mlir::dyn_cast<cir::CastOp>(op)) lowerCastOp(cast); else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op)) @@ -257,7 +278,8 @@ void LoweringPreparePass::runOnOperation() { llvm::SmallVector<mlir::Operation *> opsToTransform; op->walk([&](mlir::Operation *op) { - if (mlir::isa<cir::ArrayCtor, cir::CastOp, cir::UnaryOp>(op)) + if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp, cir::UnaryOp>( + op)) opsToTransform.push_back(op); }); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index c27b889..957a51a 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -521,6 +521,32 @@ mlir::LogicalResult CIRToLLVMBitCtzOpLowering::matchAndRewrite( return mlir::LogicalResult::success(); } +mlir::LogicalResult CIRToLLVMBitFfsOpLowering::matchAndRewrite( + cir::BitFfsOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto resTy = getTypeConverter()->convertType(op.getType()); + auto ctz = rewriter.create<mlir::LLVM::CountTrailingZerosOp>( + op.getLoc(), resTy, adaptor.getInput(), /*is_zero_poison=*/true); + + auto one = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 1); + auto ctzAddOne = rewriter.create<mlir::LLVM::AddOp>(op.getLoc(), ctz, one); + + auto zeroInputTy = rewriter.create<mlir::LLVM::ConstantOp>( + op.getLoc(), adaptor.getInput().getType(), 0); + auto isZero = rewriter.create<mlir::LLVM::ICmpOp>( + op.getLoc(), + mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(), + mlir::LLVM::ICmpPredicate::eq), + adaptor.getInput(), zeroInputTy); + + auto zero = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 0); + auto res = rewriter.create<mlir::LLVM::SelectOp>(op.getLoc(), isZero, zero, + ctzAddOne); + rewriter.replaceOp(op, res); + + return mlir::LogicalResult::success(); +} + mlir::LogicalResult CIRToLLVMBitParityOpLowering::matchAndRewrite( cir::BitParityOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -919,13 +945,45 @@ rewriteCallOrInvoke(mlir::Operation *op, mlir::ValueRange callOperands, memoryEffects, noUnwind, willReturn); mlir::LLVM::LLVMFunctionType llvmFnTy; + + // Temporary to handle the case where we need to prepend an operand if the + // callee is an alias. + SmallVector<mlir::Value> adjustedCallOperands; + if (calleeAttr) { // direct call - mlir::FunctionOpInterface fn = - mlir::SymbolTable::lookupNearestSymbolFrom<mlir::FunctionOpInterface>( - op, calleeAttr); - assert(fn && "Did not find function for call"); - llvmFnTy = cast<mlir::LLVM::LLVMFunctionType>( - converter->convertType(fn.getFunctionType())); + mlir::Operation *callee = + mlir::SymbolTable::lookupNearestSymbolFrom(op, calleeAttr); + if (auto fn = mlir::dyn_cast<mlir::FunctionOpInterface>(callee)) { + llvmFnTy = converter->convertType<mlir::LLVM::LLVMFunctionType>( + fn.getFunctionType()); + assert(llvmFnTy && "Failed to convert function type"); + } else if (auto alias = mlir::cast<mlir::LLVM::AliasOp>(callee)) { + // If the callee was an alias. In that case, + // we need to prepend the address of the alias to the operands. The + // way aliases work in the LLVM dialect is a little counter-intuitive. + // The AliasOp itself is a pseudo-function that returns the address of + // the global value being aliased, but when we generate the call we + // need to insert an operation that gets the address of the AliasOp. + // This all gets sorted out when the LLVM dialect is lowered to LLVM IR. + auto symAttr = mlir::cast<mlir::FlatSymbolRefAttr>(calleeAttr); + auto addrOfAlias = + mlir::LLVM::AddressOfOp::create( + rewriter, op->getLoc(), + mlir::LLVM::LLVMPointerType::get(rewriter.getContext()), symAttr) + .getResult(); + adjustedCallOperands.push_back(addrOfAlias); + + // Now add the regular operands and assign this to the range value. + llvm::append_range(adjustedCallOperands, callOperands); + callOperands = adjustedCallOperands; + + // Clear the callee attribute because we're calling an alias. + calleeAttr = {}; + llvmFnTy = mlir::cast<mlir::LLVM::LLVMFunctionType>(alias.getType()); + } else { + // Was this an ifunc? + return op->emitError("Unexpected callee type!"); + } } else { // indirect call assert(!op->getOperands().empty() && "operands list must no be empty for the indirect call"); @@ -1172,6 +1230,30 @@ void CIRToLLVMFuncOpLowering::lowerFuncAttributes( } } +mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewriteAlias( + cir::FuncOp op, llvm::StringRef aliasee, mlir::Type ty, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + SmallVector<mlir::NamedAttribute, 4> attributes; + lowerFuncAttributes(op, /*filterArgAndResAttrs=*/false, attributes); + + mlir::Location loc = op.getLoc(); + auto aliasOp = rewriter.replaceOpWithNewOp<mlir::LLVM::AliasOp>( + op, ty, convertLinkage(op.getLinkage()), op.getName(), op.getDsoLocal(), + /*threadLocal=*/false, attributes); + + // Create the alias body + mlir::OpBuilder builder(op.getContext()); + mlir::Block *block = builder.createBlock(&aliasOp.getInitializerRegion()); + builder.setInsertionPointToStart(block); + // The type of AddressOfOp is always a pointer. + assert(!cir::MissingFeatures::addressSpace()); + mlir::Type ptrTy = mlir::LLVM::LLVMPointerType::get(ty.getContext()); + auto addrOp = mlir::LLVM::AddressOfOp::create(builder, loc, ptrTy, aliasee); + mlir::LLVM::ReturnOp::create(builder, loc, addrOp); + + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite( cir::FuncOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -1196,6 +1278,11 @@ mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite( resultType ? resultType : mlir::LLVM::LLVMVoidType::get(getContext()), signatureConversion.getConvertedTypes(), /*isVarArg=*/fnType.isVarArg()); + + // If this is an alias, it needs to be lowered to llvm::AliasOp. + if (std::optional<llvm::StringRef> aliasee = op.getAliasee()) + return matchAndRewriteAlias(op, *aliasee, llvmFnTy, adaptor, rewriter); + // LLVMFuncOp expects a single FileLine Location instead of a fused // location. mlir::Location loc = op.getLoc(); @@ -2089,6 +2176,7 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMBitClrsbOpLowering, CIRToLLVMBitClzOpLowering, CIRToLLVMBitCtzOpLowering, + CIRToLLVMBitFfsOpLowering, CIRToLLVMBitParityOpLowering, CIRToLLVMBitPopcountOpLowering, CIRToLLVMBitReverseOpLowering, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 2911ced..f339d43 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -84,6 +84,16 @@ public: mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMBitFfsOpLowering + : public mlir::OpConversionPattern<cir::BitFfsOp> { +public: + using mlir::OpConversionPattern<cir::BitFfsOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::BitFfsOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMBitParityOpLowering : public mlir::OpConversionPattern<cir::BitParityOp> { public: @@ -257,6 +267,11 @@ class CIRToLLVMFuncOpLowering : public mlir::OpConversionPattern<cir::FuncOp> { cir::FuncOp func, bool filterArgAndResAttrs, mlir::SmallVectorImpl<mlir::NamedAttribute> &result) const; + mlir::LogicalResult + matchAndRewriteAlias(cir::FuncOp op, llvm::StringRef aliasee, mlir::Type ty, + OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const; + public: using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern; diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index eb5b604..2c0767f 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -908,6 +908,8 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { bool CodeGenAction::hasIRSupport() const { return true; } void CodeGenAction::EndSourceFileAction() { + ASTFrontendAction::EndSourceFileAction(); + // If the consumer creation failed, do nothing. if (!getCompilerInstance().hasASTConsumer()) return; @@ -932,7 +934,7 @@ CodeGenerator *CodeGenAction::getCodeGenerator() const { bool CodeGenAction::BeginSourceFileAction(CompilerInstance &CI) { if (CI.getFrontendOpts().GenReducedBMI) CI.getLangOpts().setCompilingModule(LangOptions::CMK_ModuleInterface); - return true; + return ASTFrontendAction::BeginSourceFileAction(CI); } static std::unique_ptr<raw_pwrite_stream> diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 2e6b4b3..980f7eb 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4922,19 +4922,6 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, if (Builtin->LLVMIntrinsic == 0) return nullptr; - if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) { - // If we already know the streaming mode, don't bother with the intrinsic - // and emit a constant instead - const auto *FD = cast<FunctionDecl>(CurFuncDecl); - if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) { - unsigned SMEAttrs = FPT->getAArch64SMEAttributes(); - if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) { - bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask; - return ConstantInt::getBool(Builder.getContext(), IsStreaming); - } - } - } - // Predicates must match the main datatype. for (Value *&Op : Ops) if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType())) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 1d7dad0..25c6b5a 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -191,9 +191,10 @@ static void getAArch64MultilibFlags(const Driver &D, for (const auto &ArchInfo : AArch64::ArchInfos) if (FeatureSet.contains(ArchInfo->ArchFeature)) ArchName = ArchInfo->Name; - assert(!ArchName.empty() && "at least one architecture should be found"); - MArch.insert(MArch.begin(), ("-march=" + ArchName).str()); - Result.push_back(llvm::join(MArch, "+")); + if (!ArchName.empty()) { + MArch.insert(MArch.begin(), ("-march=" + ArchName).str()); + Result.push_back(llvm::join(MArch, "+")); + } const Arg *BranchProtectionArg = Args.getLastArgNoClaim(options::OPT_mbranch_protection_EQ); @@ -760,7 +761,7 @@ std::string ToolChain::buildCompilerRTBasename(const llvm::opt::ArgList &Args, break; case ToolChain::FT_Shared: if (TT.isOSWindows()) - Suffix = TT.isWindowsGNUEnvironment() ? ".dll.a" : ".lib"; + Suffix = TT.isOSCygMing() ? ".dll.a" : ".lib"; else if (TT.isOSAIX()) Suffix = ".a"; else diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 6bd710e..418f9fd 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -467,3 +467,18 @@ void aarch64::setPAuthABIInTriple(const Driver &D, const ArgList &Args, break; } } + +/// Is the triple {aarch64.aarch64_be}-none-elf? +bool aarch64::isAArch64BareMetal(const llvm::Triple &Triple) { + if (Triple.getArch() != llvm::Triple::aarch64 && + Triple.getArch() != llvm::Triple::aarch64_be) + return false; + + if (Triple.getVendor() != llvm::Triple::UnknownVendor) + return false; + + if (Triple.getOS() != llvm::Triple::UnknownOS) + return false; + + return Triple.getEnvironmentName() == "elf"; +} diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.h b/clang/lib/Driver/ToolChains/Arch/AArch64.h index 2057272..2765ee8 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.h +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.h @@ -30,6 +30,7 @@ std::string getAArch64TargetCPU(const llvm::opt::ArgList &Args, void setPAuthABIInTriple(const Driver &D, const llvm::opt::ArgList &Args, llvm::Triple &triple); +bool isAArch64BareMetal(const llvm::Triple &Triple); } // end namespace aarch64 } // end namespace target diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 497f333..207150e 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -12,6 +12,7 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/InputInfo.h" +#include "Arch/AArch64.h" #include "Arch/ARM.h" #include "Arch/RISCV.h" #include "clang/Driver/Compilation.h" @@ -31,21 +32,6 @@ using namespace clang::driver; using namespace clang::driver::tools; using namespace clang::driver::toolchains; -/// Is the triple {aarch64.aarch64_be}-none-elf? -static bool isAArch64BareMetal(const llvm::Triple &Triple) { - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_be) - return false; - - if (Triple.getVendor() != llvm::Triple::UnknownVendor) - return false; - - if (Triple.getOS() != llvm::Triple::UnknownOS) - return false; - - return Triple.getEnvironmentName() == "elf"; -} - static bool isRISCVBareMetal(const llvm::Triple &Triple) { if (!Triple.isRISCV()) return false; @@ -363,8 +349,9 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple, } bool BareMetal::handlesTarget(const llvm::Triple &Triple) { - return arm::isARMEABIBareMetal(Triple) || isAArch64BareMetal(Triple) || - isRISCVBareMetal(Triple) || isPPCBareMetal(Triple); + return arm::isARMEABIBareMetal(Triple) || + aarch64::isAArch64BareMetal(Triple) || isRISCVBareMetal(Triple) || + isPPCBareMetal(Triple); } Tool *BareMetal::buildLinker() const { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9d882db..d7c8208 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5944,7 +5944,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-mms-bitfields"); } - if (Triple.isWindowsGNUEnvironment()) { + if (Triple.isOSCygMing()) { Args.addOptOutFlag(CmdArgs, options::OPT_fauto_import, options::OPT_fno_auto_import); } diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 826e2ea..3086c14 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -547,15 +547,22 @@ const char *tools::getLDMOption(const llvm::Triple &T, const ArgList &Args) { case llvm::Triple::aarch64: if (T.isOSManagarm()) return "aarch64managarm"; + else if (aarch64::isAArch64BareMetal(T)) + return "aarch64elf"; return "aarch64linux"; case llvm::Triple::aarch64_be: + if (aarch64::isAArch64BareMetal(T)) + return "aarch64elfb"; return "aarch64linuxb"; case llvm::Triple::arm: case llvm::Triple::thumb: case llvm::Triple::armeb: - case llvm::Triple::thumbeb: - return tools::arm::isARMBigEndian(T, Args) ? "armelfb_linux_eabi" - : "armelf_linux_eabi"; + case llvm::Triple::thumbeb: { + bool IsBigEndian = tools::arm::isARMBigEndian(T, Args); + if (arm::isARMEABIBareMetal(T)) + return IsBigEndian ? "armelfb" : "armelf"; + return IsBigEndian ? "armelfb_linux_eabi" : "armelf_linux_eabi"; + } case llvm::Triple::m68k: return "m68kelf"; case llvm::Triple::ppc: diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index dcfbd53..685a9bb 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -181,7 +181,7 @@ bool GeneratePCHAction::shouldEraseOutputFiles() { bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) { CI.getLangOpts().CompilingPCH = true; - return true; + return ASTFrontendAction::BeginSourceFileAction(CI); } std::vector<std::unique_ptr<ASTConsumer>> diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 382ccd6..008a35d 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -945,8 +945,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (LangOpts.GNUCVersion && LangOpts.CPlusPlus11) Builder.defineMacro("__GXX_EXPERIMENTAL_CXX0X__"); - if (TI.getTriple().isWindowsGNUEnvironment()) { - // Set ABI defining macros for libstdc++ for MinGW, where the + if (TI.getTriple().isOSCygMing()) { + // Set ABI defining macros for libstdc++ for MinGW and Cygwin, where the // default in libstdc++ differs from the defaults for this target. Builder.defineMacro("__GXX_TYPEINFO_EQUALITY_INLINE", "0"); } diff --git a/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/clang/lib/Frontend/Rewrite/FrontendActions.cpp index 84e7a4f..6c9c9d5 100644 --- a/clang/lib/Frontend/Rewrite/FrontendActions.cpp +++ b/clang/lib/Frontend/Rewrite/FrontendActions.cpp @@ -103,12 +103,13 @@ bool FixItAction::BeginSourceFileAction(CompilerInstance &CI) { } Rewriter.reset(new FixItRewriter(CI.getDiagnostics(), CI.getSourceManager(), CI.getLangOpts(), FixItOpts.get())); - return true; + return ASTFrontendAction::BeginSourceFileAction(CI); } void FixItAction::EndSourceFileAction() { // Otherwise rewrite all files. Rewriter->WriteFixedFiles(); + ASTFrontendAction::EndSourceFileAction(); } bool FixItRecompile::BeginInvocation(CompilerInstance &CI) { @@ -298,7 +299,7 @@ bool RewriteIncludesAction::BeginSourceFileAction(CompilerInstance &CI) { std::make_unique<RewriteImportsListener>(CI, OutputStream)); } - return true; + return PreprocessorFrontendAction::BeginSourceFileAction(CI); } void RewriteIncludesAction::ExecuteAction() { diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h index 7e614f7..9d96e36c7 100644 --- a/clang/lib/Headers/avx10_2_512niintrin.h +++ b/clang/lib/Headers/avx10_2_512niintrin.h @@ -197,7 +197,7 @@ _mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -218,7 +218,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32( } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -239,7 +239,7 @@ _mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -260,7 +260,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32( } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -281,7 +281,7 @@ _mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -302,7 +302,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32( } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h index 992be18..d5a66cf 100644 --- a/clang/lib/Headers/avx10_2niintrin.h +++ b/clang/lib/Headers/avx10_2niintrin.h @@ -253,7 +253,7 @@ _mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -266,7 +266,7 @@ _mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { +_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -279,7 +279,7 @@ _mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -292,7 +292,7 @@ _mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwsuds_epi32( - __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -305,7 +305,7 @@ _mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -318,7 +318,7 @@ _mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { +_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -331,7 +331,7 @@ _mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -344,7 +344,7 @@ _mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwusds_epi32( - __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -357,7 +357,7 @@ _mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -370,7 +370,7 @@ _mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { +_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -383,7 +383,7 @@ _mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -396,7 +396,7 @@ _mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32( - __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index e1e0fde..f65b4b3 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -18410,6 +18410,22 @@ intel_sub_group_avc_mce_convert_to_sic_result( #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end #endif // cl_intel_device_side_avc_motion_estimation +#if defined(cl_intel_bfloat16_conversions) +ushort __ovld intel_convert_bfloat16_as_ushort(float source); +ushort2 __ovld intel_convert_bfloat162_as_ushort2(float2 source); +ushort3 __ovld intel_convert_bfloat163_as_ushort3(float3 source); +ushort4 __ovld intel_convert_bfloat164_as_ushort4(float4 source); +ushort8 __ovld intel_convert_bfloat168_as_ushort8(float8 source); +ushort16 __ovld intel_convert_bfloat1616_as_ushort16(float16 source); + +float __ovld intel_convert_as_bfloat16_float(ushort source); +float2 __ovld intel_convert_as_bfloat162_float2(ushort2 source); +float3 __ovld intel_convert_as_bfloat163_float3(ushort3 source); +float4 __ovld intel_convert_as_bfloat164_float4(ushort4 source); +float8 __ovld intel_convert_as_bfloat168_float8(ushort8 source); +float16 __ovld intel_convert_as_bfloat1616_float16(ushort16 source); +#endif // cl_intel_bfloat16_conversions + #ifdef cl_amd_media_ops uint __ovld amd_bitalign(uint, uint, uint); uint2 __ovld amd_bitalign(uint2, uint2, uint2); diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index 8e27fab..e09c352 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -846,9 +846,9 @@ bool SemaARM::CheckARMCoprocessorImmediate(const TargetInfo &TI, return false; } -bool SemaARM::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, - CallExpr *TheCall, - unsigned MaxWidth) { +bool SemaARM::CheckARMBuiltinExclusiveCall(const TargetInfo &TI, + unsigned BuiltinID, + CallExpr *TheCall) { assert((BuiltinID == ARM::BI__builtin_arm_ldrex || BuiltinID == ARM::BI__builtin_arm_ldaex || BuiltinID == ARM::BI__builtin_arm_strex || @@ -923,12 +923,56 @@ bool SemaARM::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, return true; } - // But ARM doesn't have instructions to deal with 128-bit versions. - if (Context.getTypeSize(ValType) > MaxWidth) { - assert(MaxWidth == 64 && "Diagnostic unexpectedly inaccurate"); - Diag(DRE->getBeginLoc(), diag::err_atomic_exclusive_builtin_pointer_size) - << PointerArg->getType() << PointerArg->getSourceRange(); - return true; + // Check whether the size of the type can be handled atomically on this + // target. + if (!TI.getTriple().isAArch64()) { + unsigned Mask = TI.getARMLDREXMask(); + unsigned Bits = Context.getTypeSize(ValType); + bool Supported = + (llvm::isPowerOf2_64(Bits)) && Bits >= 8 && (Mask & (Bits / 8)); + + if (!Supported) { + // Emit a diagnostic saying that this size isn't available. If _no_ size + // of exclusive access is supported on this target, we emit a diagnostic + // with special wording for that case, but otherwise, we emit + // err_atomic_exclusive_builtin_pointer_size and loop over `Mask` to + // control what subset of sizes it lists as legal. + if (Mask) { + auto D = Diag(DRE->getBeginLoc(), + diag::err_atomic_exclusive_builtin_pointer_size) + << PointerArg->getType(); + bool Started = false; + for (unsigned Size = 1; Size <= 8; Size <<= 1) { + // For each of the sizes 1,2,4,8, pass two integers into the + // diagnostic. The first selects a separator from the previous + // number: 0 for no separator at all, 1 for a comma, 2 for " or " + // which appears before the final number in a list of more than one. + // The second integer just indicates whether we print this size in + // the message at all. + if (!(Mask & Size)) { + // This size isn't one of the supported ones, so emit no separator + // text and don't print the size itself. + D << 0 << 0; + } else { + // This size is supported, so print it, and an appropriate + // separator. + Mask &= ~Size; + if (!Started) + D << 0; // No separator if this is the first size we've printed + else if (Mask) + D << 1; // "," if there's still another size to come + else + D << 2; // " or " if the size we're about to print is the last + D << 1; // print the size itself + Started = true; + } + } + } else { + Diag(DRE->getBeginLoc(), + diag::err_atomic_exclusive_builtin_pointer_size_none) + << PointerArg->getSourceRange(); + } + } } switch (ValType.getObjCLifetime()) { @@ -972,7 +1016,7 @@ bool SemaARM::CheckARMBuiltinFunctionCall(const TargetInfo &TI, BuiltinID == ARM::BI__builtin_arm_ldaex || BuiltinID == ARM::BI__builtin_arm_strex || BuiltinID == ARM::BI__builtin_arm_stlex) { - return CheckARMBuiltinExclusiveCall(BuiltinID, TheCall, 64); + return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall); } if (BuiltinID == ARM::BI__builtin_arm_prefetch) { @@ -1053,7 +1097,7 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, BuiltinID == AArch64::BI__builtin_arm_ldaex || BuiltinID == AArch64::BI__builtin_arm_strex || BuiltinID == AArch64::BI__builtin_arm_stlex) { - return CheckARMBuiltinExclusiveCall(BuiltinID, TheCall, 128); + return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall); } if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index d7420bd..c7e7507 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -12586,9 +12586,9 @@ static bool isDefaultStdCall(FunctionDecl *FD, Sema &S) { if (FD->getName() == "main" || FD->getName() == "wmain") return false; - // Default calling convention for MinGW is __cdecl + // Default calling convention for MinGW and Cygwin is __cdecl const llvm::Triple &T = S.Context.getTargetInfo().getTriple(); - if (T.isWindowsGNUEnvironment()) + if (T.isOSCygMing()) return false; // Default calling convention for WinMain, wWinMain and DllMain diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 857d46a..77aa716 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -795,6 +795,10 @@ ExprResult Sema::BuildCXXAssumeExpr(Expr *Assumption, if (Res.isInvalid()) return ExprError(); + Res = ActOnFinishFullExpr(Res.get(), /*DiscardedValue=*/false); + if (Res.isInvalid()) + return ExprError(); + Assumption = Res.get(); if (Assumption->HasSideEffects(Context)) Diag(Assumption->getBeginLoc(), diag::warn_assume_side_effects) diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 20bac0e..d84d0ca1 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2270,11 +2270,6 @@ TemplateInstantiator::TransformCXXAssumeAttr(const CXXAssumeAttr *AA) { if (!Res.isUsable()) return AA; - Res = getSema().ActOnFinishFullExpr(Res.get(), - /*DiscardedValue=*/false); - if (!Res.isUsable()) - return AA; - if (!(Res.get()->getDependence() & ExprDependence::TypeValueInstantiation)) { Res = getSema().BuildCXXAssumeExpr(Res.get(), AA->getAttrName(), AA->getRange()); |