diff options
Diffstat (limited to 'clang/lib')
51 files changed, 1844 insertions, 374 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 61dd330..0fd0e7e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12590,6 +12590,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, Type = Context.AMDGPUBufferRsrcTy; break; } + case 't': { + Type = Context.AMDGPUTextureTy; + break; + } default: llvm_unreachable("Unexpected target builtin type"); } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 891344d..a2e97fc 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1294,95 +1294,6 @@ static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_ia32_bextr(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Index = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - uint64_t Shift = Index.extractBitsAsZExtValue(8, 0); - uint64_t Length = Index.extractBitsAsZExtValue(8, 8); - Length = Length > BitWidth ? BitWidth : Length; - - // Handle out of bounds cases. - if (Length == 0 || Shift >= BitWidth) { - pushInteger(S, 0, Call->getType()); - return true; - } - - uint64_t Result = Val.getZExtValue() >> Shift; - Result &= llvm::maskTrailingOnes<uint64_t>(Length); - pushInteger(S, Result, Call->getType()); - return true; -} - -static bool interp__builtin_ia32_bzhi(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - QualType CallType = Call->getType(); - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType() || - !CallType->isIntegerType()) - return false; - - APSInt Idx = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - uint64_t Index = Idx.extractBitsAsZExtValue(8, 0); - - if (Index < BitWidth) - Val.clearHighBits(BitWidth - Index); - - pushInteger(S, Val, CallType); - return true; -} - -static bool interp__builtin_ia32_pdep(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Mask = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - APInt Result = APInt::getZero(BitWidth); - for (unsigned I = 0, P = 0; I != BitWidth; ++I) { - if (Mask[I]) - Result.setBitVal(I, Val[P++]); - } - pushInteger(S, std::move(Result), Call->getType()); - return true; -} - -static bool interp__builtin_ia32_pext(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Mask = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - APInt Result = APInt::getZero(BitWidth); - for (unsigned I = 0, P = 0; I != BitWidth; ++I) { - if (Mask[I]) - Result.setBitVal(P++, Val[I]); - } - pushInteger(S, std::move(Result), Call->getType()); - return true; -} - /// (CarryIn, LHS, RHS, Result) static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S, CodePtr OpPC, @@ -3275,11 +3186,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_bextr_u64: case clang::X86::BI__builtin_ia32_bextri_u32: case clang::X86::BI__builtin_ia32_bextri_u64: - return interp__builtin_ia32_bextr(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) { + unsigned BitWidth = Val.getBitWidth(); + uint64_t Shift = Idx.extractBitsAsZExtValue(8, 0); + uint64_t Length = Idx.extractBitsAsZExtValue(8, 8); + if (Length > BitWidth) { + Length = BitWidth; + } + + // Handle out of bounds cases. + if (Length == 0 || Shift >= BitWidth) + return APInt(BitWidth, 0); + + uint64_t Result = Val.getZExtValue() >> Shift; + Result &= llvm::maskTrailingOnes<uint64_t>(Length); + return APInt(BitWidth, Result); + }); case clang::X86::BI__builtin_ia32_bzhi_si: case clang::X86::BI__builtin_ia32_bzhi_di: - return interp__builtin_ia32_bzhi(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) { + unsigned BitWidth = Val.getBitWidth(); + uint64_t Index = Idx.extractBitsAsZExtValue(8, 0); + APSInt Result = Val; + + if (Index < BitWidth) + Result.clearHighBits(BitWidth - Index); + + return Result; + }); case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: @@ -3299,11 +3236,33 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pdep_si: case clang::X86::BI__builtin_ia32_pdep_di: - return interp__builtin_ia32_pdep(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) { + unsigned BitWidth = Val.getBitWidth(); + APInt Result = APInt::getZero(BitWidth); + + for (unsigned I = 0, P = 0; I != BitWidth; ++I) { + if (Mask[I]) + Result.setBitVal(I, Val[P++]); + } + + return Result; + }); case clang::X86::BI__builtin_ia32_pext_si: case clang::X86::BI__builtin_ia32_pext_di: - return interp__builtin_ia32_pext(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) { + unsigned BitWidth = Val.getBitWidth(); + APInt Result = APInt::getZero(BitWidth); + + for (unsigned I = 0, P = 0; I != BitWidth; ++I) { + if (Mask[I]) + Result.setBitVal(P++, Val[I]); + } + + return Result; + }); case clang::X86::BI__builtin_ia32_addcarryx_u32: case clang::X86::BI__builtin_ia32_addcarryx_u64: diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index af89b66..cd738ce 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -262,6 +262,7 @@ public: case Storage::Typeid: return false; } + llvm_unreachable("Unknown clang::interp::Storage enum"); } /// Checks if the pointer is live. bool isLive() const { diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 55b93e1..2ce4419 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { return new (C) OMPPartialClause(); } +OMPLoopRangeClause * +OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc, + Expr *First, Expr *Count) { + OMPLoopRangeClause *Clause = CreateEmpty(C); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setFirstLoc(FirstLoc); + Clause->setCountLoc(CountLoc); + Clause->setLocEnd(EndLoc); + Clause->setFirst(First); + Clause->setCount(Count); + return Clause; +} + +OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { + return new (C) OMPLoopRangeClause(); +} + OMPAllocateClause *OMPAllocateClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc, @@ -1964,6 +1984,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) { } } +void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) { + OS << "looprange"; + + Expr *First = Node->getFirst(); + Expr *Count = Node->getCount(); + + if (First && Count) { + OS << "("; + First->printPretty(OS, nullptr, Policy, 0); + OS << ","; + Count->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 1f6586f..a5b0cd3 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -125,13 +125,12 @@ OMPLoopBasedDirective::tryToFindNextInnerLoop(Stmt *CurStmt, bool OMPLoopBasedDirective::doForAllLoops( Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, llvm::function_ref<bool(unsigned, Stmt *)> Callback, - llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)> + llvm::function_ref<void(OMPLoopTransformationDirective *)> OnTransformationCallback) { CurStmt = CurStmt->IgnoreContainers(); for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) { while (true) { - auto *Dir = - dyn_cast<OMPCanonicalLoopNestTransformationDirective>(CurStmt); + auto *Dir = dyn_cast<OMPLoopTransformationDirective>(CurStmt); if (!Dir) break; @@ -371,6 +370,22 @@ OMPForDirective *OMPForDirective::Create( return Dir; } +Stmt *OMPLoopTransformationDirective::getTransformedStmt() const { + if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S)) + return D->getTransformedStmt(); + if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) + return D->getTransformedStmt(); + llvm_unreachable("unexpected object type"); +} + +Stmt *OMPLoopTransformationDirective::getPreInits() const { + if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S)) + return D->getPreInits(); + if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) + return D->getPreInits(); + llvm_unreachable("unexpected object type"); +} + Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const { switch (getStmtClass()) { #define STMT(CLASS, PARENT) @@ -380,7 +395,7 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const { return static_cast<const CLASS *>(this)->getTransformedStmt(); #include "clang/AST/StmtNodes.inc" default: - llvm_unreachable("Not a loop transformation"); + llvm_unreachable("Not a loop transformation for canonical loop nests"); } } @@ -393,7 +408,34 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getPreInits() const { return static_cast<const CLASS *>(this)->getPreInits(); #include "clang/AST/StmtNodes.inc" default: - llvm_unreachable("Not a loop transformation"); + llvm_unreachable("Not a loop transformation for canonical loop nests"); + } +} + +Stmt * +OMPCanonicalLoopSequenceTransformationDirective::getTransformedStmt() const { + switch (getStmtClass()) { +#define STMT(CLASS, PARENT) +#define ABSTRACT_STMT(CLASS) +#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + case Stmt::CLASS##Class: \ + return static_cast<const CLASS *>(this)->getTransformedStmt(); +#include "clang/AST/StmtNodes.inc" + default: + llvm_unreachable("Not a loop transformation for canonical loop sequences"); + } +} + +Stmt *OMPCanonicalLoopSequenceTransformationDirective::getPreInits() const { + switch (getStmtClass()) { +#define STMT(CLASS, PARENT) +#define ABSTRACT_STMT(CLASS) +#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + case Stmt::CLASS##Class: \ + return static_cast<const CLASS *>(this)->getPreInits(); +#include "clang/AST/StmtNodes.inc" + default: + llvm_unreachable("Not a loop transformation for canonical loop sequences"); } } @@ -510,6 +552,27 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, SourceLocation(), SourceLocation(), NumLoops); } +OMPFuseDirective *OMPFuseDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef<OMPClause *> Clauses, unsigned NumGeneratedTopLevelLoops, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) { + + OMPFuseDirective *Dir = createDirective<OMPFuseDirective>( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + Dir->setNumGeneratedTopLevelLoops(NumGeneratedTopLevelLoops); + return Dir; +} + +OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses) { + OMPFuseDirective *Dir = createEmptyDirective<OMPFuseDirective>( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation()); + return Dir; +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 2c9c358..586c300 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -795,6 +795,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) { + Indent() << "#pragma omp fuse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 8b3af94..589a156 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -510,6 +510,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) { Profiler->VisitExpr(Factor); } +void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + if (const Expr *First = C->getFirst()) + Profiler->VisitExpr(First); + if (const Expr *Count = C->getCount()) + Profiler->VisitExpr(Count); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); @@ -1025,6 +1032,15 @@ void StmtProfiler::VisitOMPInterchangeDirective( VisitOMPCanonicalLoopNestTransformationDirective(S); } +void StmtProfiler::VisitOMPCanonicalLoopSequenceTransformationDirective( + const OMPCanonicalLoopSequenceTransformationDirective *S) { + VisitOMPExecutableDirective(S); +} + +void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) { + VisitOMPCanonicalLoopSequenceTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 387026e..64b2bff 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -282,6 +282,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -627,6 +628,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -755,9 +757,14 @@ bool clang::isOpenMPCanonicalLoopNestTransformationDirective( DKind == OMPD_interchange || DKind == OMPD_stripe; } +bool clang::isOpenMPCanonicalLoopSequenceTransformationDirective( + OpenMPDirectiveKind DKind) { + return DKind == OMPD_fuse; +} + bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { - // FIXME: There will be more cases when we implement 'fuse'. - return isOpenMPCanonicalLoopNestTransformationDirective(DKind); + return isOpenMPCanonicalLoopNestTransformationDirective(DKind) || + isOpenMPCanonicalLoopSequenceTransformationDirective(DKind); } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index cb8fe6c..9d12a13 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -951,28 +951,37 @@ Address CIRGenFunction::getAddressOfBaseClass( bool nullCheckValue, SourceLocation loc) { assert(!path.empty() && "Base path should not be empty!"); + CastExpr::path_const_iterator start = path.begin(); + const CXXRecordDecl *vBase = nullptr; + if ((*path.begin())->isVirtual()) { - // The implementation here is actually complete, but let's flag this - // as an error until the rest of the virtual base class support is in place. - cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base"); - return Address::invalid(); + vBase = (*start)->getType()->castAsCXXRecordDecl(); + ++start; } // Compute the static offset of the ultimate destination within its // allocating subobject (the virtual base, if there is one, or else // the "complete" object that we see). - CharUnits nonVirtualOffset = - cgm.computeNonVirtualBaseClassOffset(derived, path); + CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset( + vBase ? vBase : derived, {start, path.end()}); + + // If there's a virtual step, we can sometimes "devirtualize" it. + // For now, that's limited to when the derived type is final. + // TODO: "devirtualize" this for accesses to known-complete objects. + if (vBase && derived->hasAttr<FinalAttr>()) { + const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived); + CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase); + nonVirtualOffset += vBaseOffset; + vBase = nullptr; // we no longer have a virtual step + } // Get the base pointer type. mlir::Type baseValueTy = convertType((path.end()[-1])->getType()); assert(!cir::MissingFeatures::addressSpace()); - // The if statement here is redundant now, but it will be needed when we add - // support for virtual base classes. // If there is no virtual base, use cir.base_class_addr. It takes care of // the adjustment and the null pointer check. - if (nonVirtualOffset.isZero()) { + if (nonVirtualOffset.isZero() && !vBase) { assert(!cir::MissingFeatures::sanitizers()); return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0, /*assumeNotNull=*/true); @@ -980,10 +989,17 @@ Address CIRGenFunction::getAddressOfBaseClass( assert(!cir::MissingFeatures::sanitizers()); - // Apply the offset - value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, - nonVirtualOffset.getQuantity(), - /*assumeNotNull=*/true); + // Compute the virtual offset. + mlir::Value virtualOffset = nullptr; + if (vBase) { + virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset( + getLoc(loc), *this, value, derived, vBase); + } + + // Apply both offsets. + value = applyNonVirtualAndVirtualOffset( + getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived, + vBase, baseValueTy, not nullCheckValue); // Cast to the destination type. value = value.withElementType(builder, baseValueTy); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 1f7e3dd..83208bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -210,6 +210,60 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall( return emitCall(fnInfo, callee, returnValue, args, nullptr, loc); } +namespace { +/// The parameters to pass to a usual operator delete. +struct UsualDeleteParams { + TypeAwareAllocationMode typeAwareDelete = TypeAwareAllocationMode::No; + bool destroyingDelete = false; + bool size = false; + AlignedAllocationMode alignment = AlignedAllocationMode::No; +}; +} // namespace + +// FIXME(cir): this should be shared with LLVM codegen +static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *fd) { + UsualDeleteParams params; + + const FunctionProtoType *fpt = fd->getType()->castAs<FunctionProtoType>(); + auto ai = fpt->param_type_begin(), ae = fpt->param_type_end(); + + if (fd->isTypeAwareOperatorNewOrDelete()) { + params.typeAwareDelete = TypeAwareAllocationMode::Yes; + assert(ai != ae); + ++ai; + } + + // The first argument after the type-identity parameter (if any) is + // always a void* (or C* for a destroying operator delete for class + // type C). + ++ai; + + // The next parameter may be a std::destroying_delete_t. + if (fd->isDestroyingOperatorDelete()) { + params.destroyingDelete = true; + assert(ai != ae); + ++ai; + } + + // Figure out what other parameters we should be implicitly passing. + if (ai != ae && (*ai)->isIntegerType()) { + params.size = true; + ++ai; + } else { + assert(!isTypeAwareAllocation(params.typeAwareDelete)); + } + + if (ai != ae && (*ai)->isAlignValT()) { + params.alignment = AlignedAllocationMode::Yes; + ++ai; + } else { + assert(!isTypeAwareAllocation(params.typeAwareDelete)); + } + + assert(ai == ae && "unexpected usual deallocation function parameter"); + return params; +} + static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, unsigned minElements, mlir::Value &numElements, @@ -332,6 +386,117 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf, return rv; } +namespace { +/// Calls the given 'operator delete' on a single object. +struct CallObjectDelete final : EHScopeStack::Cleanup { + mlir::Value ptr; + const FunctionDecl *operatorDelete; + QualType elementType; + + CallObjectDelete(mlir::Value ptr, const FunctionDecl *operatorDelete, + QualType elementType) + : ptr(ptr), operatorDelete(operatorDelete), elementType(elementType) {} + + void emit(CIRGenFunction &cgf) override { + cgf.emitDeleteCall(operatorDelete, ptr, elementType); + } + + // This is a placeholder until EHCleanupScope is implemented. + size_t getSize() const override { + assert(!cir::MissingFeatures::ehCleanupScope()); + return sizeof(CallObjectDelete); + } +}; +} // namespace + +/// Emit the code for deleting a single object. +static void emitObjectDelete(CIRGenFunction &cgf, const CXXDeleteExpr *de, + Address ptr, QualType elementType) { + // C++11 [expr.delete]p3: + // If the static type of the object to be deleted is different from its + // dynamic type, the static type shall be a base class of the dynamic type + // of the object to be deleted and the static type shall have a virtual + // destructor or the behavior is undefined. + assert(!cir::MissingFeatures::emitTypeCheck()); + + const FunctionDecl *operatorDelete = de->getOperatorDelete(); + assert(!operatorDelete->isDestroyingOperatorDelete()); + + // Find the destructor for the type, if applicable. If the + // destructor is virtual, we'll just emit the vcall and return. + const CXXDestructorDecl *dtor = nullptr; + if (const auto *rd = elementType->getAsCXXRecordDecl()) { + if (rd->hasDefinition() && !rd->hasTrivialDestructor()) { + dtor = rd->getDestructor(); + + if (dtor->isVirtual()) { + cgf.cgm.errorNYI(de->getSourceRange(), + "emitObjectDelete: virtual destructor"); + } + } + } + + // Make sure that we call delete even if the dtor throws. + // This doesn't have to a conditional cleanup because we're going + // to pop it off in a second. + cgf.ehStack.pushCleanup<CallObjectDelete>( + NormalAndEHCleanup, ptr.getPointer(), operatorDelete, elementType); + + if (dtor) { + cgf.emitCXXDestructorCall(dtor, Dtor_Complete, + /*ForVirtualBase=*/false, + /*Delegating=*/false, ptr, elementType); + } else if (elementType.getObjCLifetime()) { + assert(!cir::MissingFeatures::objCLifetime()); + cgf.cgm.errorNYI(de->getSourceRange(), "emitObjectDelete: ObjCLifetime"); + } + + // In traditional LLVM codegen null checks are emitted to save a delete call. + // In CIR we optimize for size by default, the null check should be added into + // this function callers. + assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls()); + + cgf.popCleanupBlock(); +} + +void CIRGenFunction::emitCXXDeleteExpr(const CXXDeleteExpr *e) { + const Expr *arg = e->getArgument(); + Address ptr = emitPointerWithAlignment(arg); + + // Null check the pointer. + // + // We could avoid this null check if we can determine that the object + // destruction is trivial and doesn't require an array cookie; we can + // unconditionally perform the operator delete call in that case. For now, we + // assume that deleted pointers are null rarely enough that it's better to + // keep the branch. This might be worth revisiting for a -O0 code size win. + // + // CIR note: emit the code size friendly by default for now, such as mentioned + // in `emitObjectDelete`. + assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls()); + QualType deleteTy = e->getDestroyedType(); + + // A destroying operator delete overrides the entire operation of the + // delete expression. + if (e->getOperatorDelete()->isDestroyingOperatorDelete()) { + cgm.errorNYI(e->getSourceRange(), + "emitCXXDeleteExpr: destroying operator delete"); + return; + } + + // We might be deleting a pointer to array. + deleteTy = getContext().getBaseElementType(deleteTy); + ptr = ptr.withElementType(builder, convertTypeForMem(deleteTy)); + + if (e->isArrayForm()) { + assert(!cir::MissingFeatures::deleteArray()); + cgm.errorNYI(e->getSourceRange(), "emitCXXDeleteExpr: array delete"); + return; + } else { + emitObjectDelete(*this, e, ptr, deleteTy); + } +} + mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { // The element type being allocated. QualType allocType = getContext().getBaseElementType(e->getAllocatedType()); @@ -443,3 +608,53 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { allocSizeWithoutCookie); return result.getPointer(); } + +void CIRGenFunction::emitDeleteCall(const FunctionDecl *deleteFD, + mlir::Value ptr, QualType deleteTy) { + assert(!cir::MissingFeatures::deleteArray()); + + const auto *deleteFTy = deleteFD->getType()->castAs<FunctionProtoType>(); + CallArgList deleteArgs; + + UsualDeleteParams params = getUsualDeleteParams(deleteFD); + auto paramTypeIt = deleteFTy->param_type_begin(); + + // Pass std::type_identity tag if present + if (isTypeAwareAllocation(params.typeAwareDelete)) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: type aware delete"); + + // Pass the pointer itself. + QualType argTy = *paramTypeIt++; + mlir::Value deletePtr = + builder.createBitcast(ptr.getLoc(), ptr, convertType(argTy)); + deleteArgs.add(RValue::get(deletePtr), argTy); + + // Pass the std::destroying_delete tag if present. + if (params.destroyingDelete) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: destroying delete"); + + // Pass the size if the delete function has a size_t parameter. + if (params.size) { + QualType sizeType = *paramTypeIt++; + CharUnits deleteTypeSize = getContext().getTypeSizeInChars(deleteTy); + assert(mlir::isa<cir::IntType>(convertType(sizeType)) && + "expected cir::IntType"); + cir::ConstantOp size = builder.getConstInt( + *currSrcLoc, convertType(sizeType), deleteTypeSize.getQuantity()); + + deleteArgs.add(RValue::get(size), sizeType); + } + + // Pass the alignment if the delete function has an align_val_t parameter. + if (isAlignedAllocation(params.alignment)) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: aligned allocation"); + + assert(paramTypeIt == deleteFTy->param_type_end() && + "unknown parameter to usual delete function"); + + // Emit the call to delete. + emitNewDeleteCall(*this, deleteFD, deleteFTy, deleteArgs); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index bd09d78..f4bbced 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -676,6 +676,10 @@ public: mlir::Value VisitRealImag(const UnaryOperator *e, QualType promotionType = QualType()); + mlir::Value VisitUnaryExtension(const UnaryOperator *e) { + return Visit(e->getSubExpr()); + } + mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) { CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die); return Visit(die->getExpr()); @@ -687,6 +691,10 @@ public: mlir::Value VisitCXXNewExpr(const CXXNewExpr *e) { return cgf.emitCXXNewExpr(e); } + mlir::Value VisitCXXDeleteExpr(const CXXDeleteExpr *e) { + cgf.emitCXXDeleteExpr(e); + return {}; + } mlir::Value VisitCXXThrowExpr(const CXXThrowExpr *e) { cgf.emitCXXThrowExpr(e); @@ -1274,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e, } else if (const auto *uo = dyn_cast<UnaryOperator>(e)) { switch (uo->getOpcode()) { case UO_Imag: - cgf.cgm.errorNYI(e->getSourceRange(), - "ScalarExprEmitter::emitPromoted unary imag"); - return {}; case UO_Real: return VisitRealImag(uo, promotionType); case UO_Minus: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 166435f..ef07db3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1197,6 +1197,8 @@ public: bool delegating, Address thisAddr, CallArgList &args, clang::SourceLocation loc); + void emitCXXDeleteExpr(const CXXDeleteExpr *e); + void emitCXXDestructorCall(const CXXDestructorDecl *dd, CXXDtorType type, bool forVirtualBase, bool delegating, Address thisAddr, QualType thisTy); @@ -1244,6 +1246,9 @@ public: void emitDelegatingCXXConstructorCall(const CXXConstructorDecl *ctor, const FunctionArgList &args); + void emitDeleteCall(const FunctionDecl *deleteFD, mlir::Value ptr, + QualType deleteTy); + mlir::LogicalResult emitDoStmt(const clang::DoStmt &s); /// Emit an expression as an initializer for an object (variable, field, etc.) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index eef23a0..c977ff9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -119,6 +119,19 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, cir::OptInfoAttr::get(&mlirContext, cgo.OptimizationLevel, cgo.OptimizeSize)); + // Set the module name to be the name of the main file. TranslationUnitDecl + // often contains invalid source locations and isn't a reliable source for the + // module location. + FileID mainFileId = astContext.getSourceManager().getMainFileID(); + const FileEntry &mainFile = + *astContext.getSourceManager().getFileEntryForID(mainFileId); + StringRef path = mainFile.tryGetRealPathName(); + if (!path.empty()) { + theModule.setSymName(path); + theModule->setLoc(mlir::FileLineColLoc::get(&mlirContext, path, + /*line=*/0, + /*column=*/0)); + } } CIRGenModule::~CIRGenModule() = default; diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index e842892..644c383 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::OMPSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPUnrollDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPForDirectiveClass: case Stmt::OMPForSimdDirectiveClass: case Stmt::OMPSectionsDirectiveClass: diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 58ef500..fb87036 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1355,9 +1355,11 @@ mlir::LogicalResult cir::GlobalOp::verify() { return success(); } -void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, - llvm::StringRef sym_name, mlir::Type sym_type, - bool isConstant, cir::GlobalLinkageKind linkage) { +void cir::GlobalOp::build( + OpBuilder &odsBuilder, OperationState &odsState, llvm::StringRef sym_name, + mlir::Type sym_type, bool isConstant, cir::GlobalLinkageKind linkage, + function_ref<void(OpBuilder &, Location)> ctorBuilder, + function_ref<void(OpBuilder &, Location)> dtorBuilder) { odsState.addAttribute(getSymNameAttrName(odsState.name), odsBuilder.getStringAttr(sym_name)); odsState.addAttribute(getSymTypeAttrName(odsState.name), @@ -1370,26 +1372,88 @@ void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage); odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr); + Region *ctorRegion = odsState.addRegion(); + if (ctorBuilder) { + odsBuilder.createBlock(ctorRegion); + ctorBuilder(odsBuilder, odsState.location); + } + + Region *dtorRegion = odsState.addRegion(); + if (dtorBuilder) { + odsBuilder.createBlock(dtorRegion); + dtorBuilder(odsBuilder, odsState.location); + } + odsState.addAttribute(getGlobalVisibilityAttrName(odsState.name), cir::VisibilityAttr::get(odsBuilder.getContext())); } +/// Given the region at `index`, or the parent operation if `index` is None, +/// return the successor regions. These are the regions that may be selected +/// during the flow of control. `operands` is a set of optional attributes that +/// correspond to a constant value for each operand, or null if that operand is +/// not a constant. +void cir::GlobalOp::getSuccessorRegions( + mlir::RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> ®ions) { + // The `ctor` and `dtor` regions always branch back to the parent operation. + if (!point.isParent()) { + regions.push_back(RegionSuccessor()); + return; + } + + // Don't consider the ctor region if it is empty. + Region *ctorRegion = &this->getCtorRegion(); + if (ctorRegion->empty()) + ctorRegion = nullptr; + + // Don't consider the dtor region if it is empty. + Region *dtorRegion = &this->getCtorRegion(); + if (dtorRegion->empty()) + dtorRegion = nullptr; + + // If the condition isn't constant, both regions may be executed. + if (ctorRegion) + regions.push_back(RegionSuccessor(ctorRegion)); + if (dtorRegion) + regions.push_back(RegionSuccessor(dtorRegion)); +} + static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op, - TypeAttr type, - Attribute initAttr) { + TypeAttr type, Attribute initAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { + auto printType = [&]() { p << ": " << type; }; if (!op.isDeclaration()) { p << "= "; - // This also prints the type... - if (initAttr) - printConstant(p, initAttr); + if (!ctorRegion.empty()) { + p << "ctor "; + printType(); + p << " "; + p.printRegion(ctorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } else { + // This also prints the type... + if (initAttr) + printConstant(p, initAttr); + } + + if (!dtorRegion.empty()) { + p << " dtor "; + p.printRegion(dtorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } } else { - p << ": " << type; + printType(); } } -static ParseResult -parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, - Attribute &initialValueAttr) { +static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, + TypeAttr &typeAttr, + Attribute &initialValueAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { mlir::Type opTy; if (parser.parseOptionalEqual().failed()) { // Absence of equal means a declaration, so we need to parse the type. @@ -1397,16 +1461,38 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, if (parser.parseColonType(opTy)) return failure(); } else { - // Parse constant with initializer, examples: - // cir.global @y = #cir.fp<1.250000e+00> : !cir.double - // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> - if (parseConstantValue(parser, initialValueAttr).failed()) - return failure(); + // Parse contructor, example: + // cir.global @rgb = ctor : type { ... } + if (!parser.parseOptionalKeyword("ctor")) { + if (parser.parseColonType(opTy)) + return failure(); + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed()) + return failure(); + } else { + // Parse constant with initializer, examples: + // cir.global @y = 3.400000e+00 : f32 + // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> + if (parseConstantValue(parser, initialValueAttr).failed()) + return failure(); + + assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && + "Non-typed attrs shouldn't appear here."); + auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); + opTy = typedAttr.getType(); + } - assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && - "Non-typed attrs shouldn't appear here."); - auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); - opTy = typedAttr.getType(); + // Parse destructor, example: + // dtor { ... } + if (!parser.parseOptionalKeyword("dtor")) { + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed()) + return failure(); + } } typeAttr = TypeAttr::get(opTy); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 57db20f7..64f1917 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (std::optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( - [Options](ModulePassManager &MPM, OptimizationLevel Level) { - MPM.addPass(GCOVProfilerPass(*Options)); + [this, Options](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass( + GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr())); }); if (std::optional<InstrProfOptions> Options = getInstrProfOptions(CodeGenOpts, LangOpts)) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 12c7d48..fee6bc0 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -26,6 +26,7 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" +#include "clang/AST/LambdaCapture.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/VTableBuilder.h" @@ -1903,46 +1904,61 @@ CGDebugInfo::createInlinedSubprogram(StringRef FuncName, return SP; } +llvm::StringRef +CGDebugInfo::GetLambdaCaptureName(const LambdaCapture &Capture) { + if (Capture.capturesThis()) + return CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this"; + + assert(Capture.capturesVariable()); + + const ValueDecl *CaptureDecl = Capture.getCapturedVar(); + assert(CaptureDecl && "Expected valid decl for captured variable."); + + return CaptureDecl->getName(); +} + void CGDebugInfo::CollectRecordLambdaFields( const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements, llvm::DIType *RecordTy) { // For C++11 Lambdas a Field will be the same as a Capture, but the Capture // has the name and the location of the variable so we should iterate over // both concurrently. - const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(CXXDecl); RecordDecl::field_iterator Field = CXXDecl->field_begin(); unsigned fieldno = 0; for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(), E = CXXDecl->captures_end(); I != E; ++I, ++Field, ++fieldno) { - const LambdaCapture &C = *I; - if (C.capturesVariable()) { - SourceLocation Loc = C.getLocation(); - assert(!Field->isBitField() && "lambdas don't have bitfield members!"); - ValueDecl *V = C.getCapturedVar(); - StringRef VName = V->getName(); - llvm::DIFile *VUnit = getOrCreateFile(Loc); - auto Align = getDeclAlignIfRequired(V, CGM.getContext()); - llvm::DIType *FieldType = createFieldType( - VName, Field->getType(), Loc, Field->getAccess(), - layout.getFieldOffset(fieldno), Align, VUnit, RecordTy, CXXDecl); - elements.push_back(FieldType); - } else if (C.capturesThis()) { + const LambdaCapture &Capture = *I; + const uint64_t FieldOffset = + CGM.getContext().getASTRecordLayout(CXXDecl).getFieldOffset(fieldno); + + assert(!Field->isBitField() && "lambdas don't have bitfield members!"); + + SourceLocation Loc; + uint32_t Align = 0; + + if (Capture.capturesThis()) { // TODO: Need to handle 'this' in some way by probably renaming the // this of the lambda class and having a field member of 'this' or // by using AT_object_pointer for the function and having that be // used as 'this' for semantic references. - FieldDecl *f = *Field; - llvm::DIFile *VUnit = getOrCreateFile(f->getLocation()); - QualType type = f->getType(); - StringRef ThisName = - CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this"; - llvm::DIType *fieldType = createFieldType( - ThisName, type, f->getLocation(), f->getAccess(), - layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); - - elements.push_back(fieldType); + Loc = Field->getLocation(); + } else if (Capture.capturesVariable()) { + Loc = Capture.getLocation(); + + const ValueDecl *CaptureDecl = Capture.getCapturedVar(); + assert(CaptureDecl && "Expected valid decl for captured variable."); + + Align = getDeclAlignIfRequired(CaptureDecl, CGM.getContext()); + } else { + continue; } + + llvm::DIFile *VUnit = getOrCreateFile(Loc); + + elements.push_back(createFieldType( + GetLambdaCaptureName(Capture), Field->getType(), Loc, + Field->getAccess(), FieldOffset, Align, VUnit, RecordTy, CXXDecl)); } } diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index f860773..78c3eb9 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -397,6 +397,7 @@ private: void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F, SmallVectorImpl<llvm::Metadata *> &E, llvm::DICompositeType *RecordTy); + llvm::StringRef GetLambdaCaptureName(const LambdaCapture &Capture); /// If the C++ class has vtable info then insert appropriate debug /// info entry in EltTys vector. diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index e62bc76..92636f2 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -234,6 +234,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPInterchangeDirectiveClass: EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S)); break; + case Stmt::OMPFuseDirectiveClass: + EmitOMPFuseDirective(cast<OMPFuseDirective>(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast<OMPForDirective>(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ba9c7c6..efc06a2 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -201,6 +201,24 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } else { llvm_unreachable("Unknown loop-based directive kind."); } + doEmitPreinits(PreInits); + PreCondVars.restore(CGF); + } + + void + emitPreInitStmt(CodeGenFunction &CGF, + const OMPCanonicalLoopSequenceTransformationDirective &S) { + const Stmt *PreInits; + if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) { + PreInits = Fuse->getPreInits(); + } else { + llvm_unreachable( + "Unknown canonical loop sequence transform directive kind."); + } + doEmitPreinits(PreInits); + } + + void doEmitPreinits(const Stmt *PreInits) { if (PreInits) { // CompoundStmts and DeclStmts are used as lists of PreInit statements and // declarations. Since declarations must be visible in the the following @@ -222,7 +240,6 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { CGF.EmitStmt(S); } } - PreCondVars.restore(CGF); } public: @@ -230,6 +247,11 @@ public: : CodeGenFunction::RunCleanupsScope(CGF) { emitPreInitStmt(CGF, S); } + OMPLoopScope(CodeGenFunction &CGF, + const OMPCanonicalLoopSequenceTransformationDirective &S) + : CodeGenFunction::RunCleanupsScope(CGF) { + emitPreInitStmt(CGF, S); + } }; class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { @@ -1929,6 +1951,15 @@ public: CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); } + if (const auto *Dir = + dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) { + // For simplicity we reuse the loop scope similarly to what we do with + // OMPCanonicalLoopNestTransformationDirective do by being a subclass + // of OMPLoopBasedDirective. + Scope = new OMPLoopScope(CGF, *Dir); + CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); + CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); + } } ~OMPTransformDirectiveScopeRAII() { if (!Scope) @@ -1956,8 +1987,7 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, return; } if (SimplifiedS == NextLoop) { - if (auto *Dir = - dyn_cast<OMPCanonicalLoopNestTransformationDirective>(SimplifiedS)) + if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) SimplifiedS = Dir->getTransformedStmt(); if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) SimplifiedS = CanonLoop->getLoopStmt(); @@ -2952,6 +2982,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective( EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) { + // Emit the de-sugared statement + OMPTransformDirectiveScopeRAII FuseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 727487b..f0565c1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3861,6 +3861,7 @@ public: void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); + void EmitOMPFuseDirective(const OMPFuseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPScopeDirective(const OMPScopeDirective &S); diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 07cf08c..6596ec0 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, {Src0, Src1}); } +static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) { + if (AMDGCNScope == "agent") + return "device"; + if (AMDGCNScope == "wavefront") + return "subgroup"; + return AMDGCNScope; +} + // For processing memory ordering and memory scope arguments of various // amdgcn builtins. -// \p Order takes a C++11 comptabile memory-ordering specifier and converts +// \p Order takes a C++11 compatible memory-ordering specifier and converts // it into LLVM's memory ordering specifier using atomic C ABI, and writes // to \p AO. \p Scope takes a const char * and converts it into AMDGCN // specific SyncScopeID and writes it to \p SSID. @@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, // Some of the atomic builtins take the scope as a string name. StringRef scp; if (llvm::getConstantStringInfo(Scope, scp)) { + if (getTarget().getTriple().isSPIRV()) + scp = mapScopeToSPIRV(scp); SSID = getLLVMContext().getOrInsertSyncScopeID(scp); return; } @@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, SSID = llvm::SyncScope::System; break; case 1: // __MEMORY_SCOPE_DEVICE - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); break; case 2: // __MEMORY_SCOPE_WRKGRP SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); break; case 3: // __MEMORY_SCOPE_WVFRNT - SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); break; case 4: // __MEMORY_SCOPE_SINGLE SSID = llvm::SyncScope::SingleThread; @@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // // The global/flat cases need to use agent scope to consistently produce // the native instruction instead of a cmpxchg expansion. - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f110dba..85a13357 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6613,6 +6613,9 @@ std::string Driver::GetStdModuleManifestPath(const Compilation &C, const ToolChain &TC) const { std::string error = "<NOT PRESENT>"; + if (C.getArgs().hasArg(options::OPT_nostdlib)) + return error; + switch (TC.GetCXXStdlibType(C.getArgs())) { case ToolChain::CST_Libcxx: { auto evaluate = [&](const char *library) -> std::optional<std::string> { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index e04b0e7..a28446a 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -55,7 +55,7 @@ namespace format { TYPE(ConflictAlternative) \ TYPE(ConflictEnd) \ TYPE(ConflictStart) \ - /* l_brace of if/for/while */ \ + /* l_brace of if/for/while/switch/catch */ \ TYPE(ControlStatementLBrace) \ TYPE(ControlStatementRBrace) \ TYPE(CppCastLParen) \ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 67066a1..0c9c88a 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4021,29 +4021,28 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { } } - if (IsCpp && - (LineIsFunctionDeclaration || - (FirstNonComment && FirstNonComment->is(TT_CtorDtorDeclName))) && - Line.endsWith(tok::semi, tok::r_brace)) { - auto *Tok = Line.Last->Previous; - while (Tok->isNot(tok::r_brace)) - Tok = Tok->Previous; - if (auto *LBrace = Tok->MatchingParen; LBrace && LBrace->is(TT_Unknown)) { - assert(LBrace->is(tok::l_brace)); - Tok->setBlockKind(BK_Block); - LBrace->setBlockKind(BK_Block); - LBrace->setFinalizedType(TT_FunctionLBrace); + if (IsCpp) { + if ((LineIsFunctionDeclaration || + (FirstNonComment && FirstNonComment->is(TT_CtorDtorDeclName))) && + Line.endsWith(tok::semi, tok::r_brace)) { + auto *Tok = Line.Last->Previous; + while (Tok->isNot(tok::r_brace)) + Tok = Tok->Previous; + if (auto *LBrace = Tok->MatchingParen; LBrace && LBrace->is(TT_Unknown)) { + assert(LBrace->is(tok::l_brace)); + Tok->setBlockKind(BK_Block); + LBrace->setBlockKind(BK_Block); + LBrace->setFinalizedType(TT_FunctionLBrace); + } } - } - if (IsCpp && SeenName && AfterLastAttribute && - mustBreakAfterAttributes(*AfterLastAttribute, Style)) { - AfterLastAttribute->MustBreakBefore = true; - if (LineIsFunctionDeclaration) - Line.ReturnTypeWrapped = true; - } + if (SeenName && AfterLastAttribute && + mustBreakAfterAttributes(*AfterLastAttribute, Style)) { + AfterLastAttribute->MustBreakBefore = true; + if (LineIsFunctionDeclaration) + Line.ReturnTypeWrapped = true; + } - if (IsCpp) { if (!LineIsFunctionDeclaration) { // Annotate */&/&& in `operator` function calls as binary operators. for (const auto *Tok = FirstNonComment; Tok; Tok = Tok->Next) { @@ -4089,6 +4088,11 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { } } + if (First->is(TT_ElseLBrace)) { + First->CanBreakBefore = true; + First->MustBreakBefore = true; + } + bool InFunctionDecl = Line.MightBeFunctionDecl; bool InParameterList = false; for (auto *Current = First->Next; Current; Current = Current->Next) { diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 7424958..d7d56b8 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -971,14 +971,17 @@ void DumpModuleInfoAction::ExecuteAction() { // Emit the macro definitions in the module file so that we can know how // much definitions in the module file quickly. // TODO: Emit the macro definition bodies completely. - if (auto FilteredMacros = llvm::make_filter_range( - R->getPreprocessor().macros(), - [](const auto &Macro) { return Macro.first->isFromAST(); }); - !FilteredMacros.empty()) { - Out << " Macro Definitions:\n"; - for (/*<IdentifierInfo *, MacroState> pair*/ const auto &Macro : - FilteredMacros) - Out << " " << Macro.first->getName() << "\n"; + { + std::vector<StringRef> MacroNames; + for (const auto &M : R->getPreprocessor().macros()) { + if (M.first->isFromAST()) + MacroNames.push_back(M.first->getName()); + } + llvm::sort(MacroNames); + if (!MacroNames.empty()) + Out << " Macro Definitions:\n"; + for (StringRef Name : MacroNames) + Out << " " << Name << "\n"; } // Now let's print out any modules we did not see as part of the Primary. diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index edf0a09..877ab02 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -742,7 +742,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_impl_coroutine", "201902L"); Builder.defineMacro("__cpp_designated_initializers", "201707L"); Builder.defineMacro("__cpp_impl_three_way_comparison", "201907L"); - //Builder.defineMacro("__cpp_modules", "201907L"); + // Intentionally to set __cpp_modules to 1. + // See https://github.com/llvm/llvm-project/issues/71364 for details. + // Builder.defineMacro("__cpp_modules", "201907L"); + Builder.defineMacro("__cpp_modules", "1"); Builder.defineMacro("__cpp_using_enum", "201907L"); } // C++23 features. diff --git a/clang/lib/Frontend/ModuleDependencyCollector.cpp b/clang/lib/Frontend/ModuleDependencyCollector.cpp index 3b363f9..ff37065 100644 --- a/clang/lib/Frontend/ModuleDependencyCollector.cpp +++ b/clang/lib/Frontend/ModuleDependencyCollector.cpp @@ -91,10 +91,10 @@ void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) { std::make_unique<ModuleDependencyMMCallbacks>(*this)); } -static bool isCaseSensitivePath(StringRef Path) { +static bool isCaseSensitivePath(llvm::vfs::FileSystem &VFS, StringRef Path) { SmallString<256> TmpDest = Path, UpperDest, RealDest; // Remove component traversals, links, etc. - if (llvm::sys::fs::real_path(Path, TmpDest)) + if (VFS.getRealPath(Path, TmpDest)) return true; // Current default value in vfs.yaml Path = TmpDest; @@ -104,7 +104,7 @@ static bool isCaseSensitivePath(StringRef Path) { // already expects when sensitivity isn't setup. for (auto &C : Path) UpperDest.push_back(toUppercase(C)); - if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path == RealDest) + if (!VFS.getRealPath(UpperDest, RealDest) && Path == RealDest) return false; return true; } @@ -121,7 +121,8 @@ void ModuleDependencyCollector::writeFileMap() { // Explicitly set case sensitivity for the YAML writer. For that, find out // the sensitivity at the path where the headers all collected to. - VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir)); + VFSWriter.setCaseSensitivity( + isCaseSensitivePath(Canonicalizer.getFileSystem(), VFSDir)); // Do not rely on real path names when executing the crash reproducer scripts // since we only want to actually use the files we have on the VFS cache. @@ -153,7 +154,7 @@ std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src, } else { // When collecting entries from input vfsoverlays, copy the external // contents into the cache but still map from the source. - if (!fs::exists(Dst)) + if (!Canonicalizer.getFileSystem().exists(Dst)) return std::error_code(); path::append(CacheDst, Dst); Paths.CopyFrom = Dst; diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h index 67679fc..fdb57c7 100644 --- a/clang/lib/Headers/avx10_2_512niintrin.h +++ b/clang/lib/Headers/avx10_2_512niintrin.h @@ -64,8 +64,8 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U, static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v64qi)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -84,8 +84,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v64qi)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32( @@ -104,8 +104,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v64qi)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -124,8 +124,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v64qi)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32( @@ -144,8 +144,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v64qu)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -164,8 +164,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v64qu)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32( diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index a7f7099..d6ba19a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2311,10 +2311,9 @@ _mm256_cvttps_epi32(__m256 __a) /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 64 bit double containing the first element of the input vector. -static __inline double __DEFAULT_FN_ATTRS -_mm256_cvtsd_f64(__m256d __a) -{ - return __a[0]; +static __inline double __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsd_f64(__m256d __a) { + return __a[0]; } /// Returns the first element of the input vector of [8 x i32]. @@ -2327,11 +2326,10 @@ _mm256_cvtsd_f64(__m256d __a) /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 32 bit integer containing the first element of the input vector. -static __inline int __DEFAULT_FN_ATTRS -_mm256_cvtsi256_si32(__m256i __a) -{ - __v8si __b = (__v8si)__a; - return __b[0]; +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsi256_si32(__m256i __a) { + __v8si __b = (__v8si)__a; + return __b[0]; } /// Returns the first element of the input vector of [8 x float]. @@ -2344,10 +2342,9 @@ _mm256_cvtsi256_si32(__m256i __a) /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 32 bit float containing the first element of the input vector. -static __inline float __DEFAULT_FN_ATTRS -_mm256_cvtss_f32(__m256 __a) -{ - return __a[0]; +static __inline float __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtss_f32(__m256 __a) { + return __a[0]; } /* Vector replicate */ diff --git a/clang/lib/Headers/avxvnniint8intrin.h b/clang/lib/Headers/avxvnniint8intrin.h index c211620..858b66b 100644 --- a/clang/lib/Headers/avxvnniint8intrin.h +++ b/clang/lib/Headers/avxvnniint8intrin.h @@ -14,6 +14,7 @@ #ifndef __AVXVNNIINT8INTRIN_H #define __AVXVNNIINT8INTRIN_H +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -44,10 +45,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbssd_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -78,10 +81,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbssd_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -94,7 +99,7 @@ /// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSSDS instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. @@ -113,10 +118,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbssds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -129,7 +136,7 @@ /// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSSDS instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. @@ -148,10 +155,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbssds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -163,7 +172,7 @@ /// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. @@ -182,10 +191,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbsud_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -197,7 +208,7 @@ /// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. @@ -216,10 +227,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbsud_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -232,7 +245,7 @@ /// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUDS instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. @@ -251,10 +264,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbsuds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -267,7 +282,7 @@ /// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUDS instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. @@ -286,10 +301,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbsuds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -301,7 +318,7 @@ /// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBUUD instruction. /// /// \param __A /// A 128-bit vector of [16 x unsigned char]. @@ -320,10 +337,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbuud_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v16qu)(__A), \ + (__v16qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -335,7 +354,7 @@ /// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBUUD instruction. /// /// \param __A /// A 256-bit vector of [32 x unsigned char]. @@ -354,10 +373,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbuud_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v32qu)(__A), \ + (__v32qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -389,10 +410,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbuuds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbuuds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbuuds128((__v4si)(__W), (__v16qu)(__A), \ + (__v16qu)(__B))) +// clang-format off /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed @@ -423,8 +446,9 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbuuds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbuuds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbuuds256((__v8si)(__W), (__v32qu)(__A), \ + (__v32qu)(__B))) #endif // __AVXVNNIINT8INTRIN_H diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 8605ba2..a2c6957 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1299,7 +1299,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( Diag(Tok, getLangOpts().CPlusPlus23 ? diag::warn_cxx20_compat_decl_attrs_on_lambda : diag::ext_decl_attrs_on_lambda) - << Tok.getIdentifierInfo() << Tok.isRegularKeywordAttribute(); + << Tok.isRegularKeywordAttribute() << Tok.getIdentifierInfo(); MaybeParseCXX11Attributes(D); } diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 02f3f10..04f29c8 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2968,6 +2968,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() { OpenLoc, CloseLoc); } +OMPClause *Parser::ParseOpenMPLoopRangeClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SourceLocation FirstLoc, CountLoc; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + FirstLoc = Tok.getLocation(); + ExprResult FirstVal = ParseConstantExpression(); + if (!FirstVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ExpectAndConsume(tok::comma); + + CountLoc = Tok.getLocation(); + ExprResult CountVal = ParseConstantExpression(); + if (!CountVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + T.consumeClose(); + + return Actions.OpenMP().ActOnOpenMPLoopRangeClause( + FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(), + FirstLoc, CountLoc, T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPPermutationClause() { SourceLocation ClauseNameLoc, OpenLoc, CloseLoc; SmallVector<Expr *> ArgExprs; @@ -3473,6 +3506,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPClause(CKind, WrongDirective); break; + case OMPC_looprange: + Clause = ParseOpenMPLoopRangeClause(); + break; default: break; } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 1b66d83..8606227 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -983,10 +983,9 @@ static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use, case UninitUse::AfterDecl: case UninitUse::AfterCall: S.Diag(VD->getLocation(), diag::warn_sometimes_uninit_var) - << VD->getDeclName() << IsCapturedByBlock - << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5) - << const_cast<DeclContext*>(VD->getLexicalDeclContext()) - << VD->getSourceRange(); + << VD->getDeclName() << IsCapturedByBlock + << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5) + << VD->getLexicalDeclContext() << VD->getSourceRange(); S.Diag(Use.getUser()->getBeginLoc(), diag::note_uninit_var_use) << IsCapturedByBlock << Use.getUser()->getSourceRange(); return; diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index d238b79..dc6d232 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -193,7 +193,7 @@ DiagRecursiveConstraintEval(Sema &S, llvm::FoldingSetNodeID &ID, // Sema::InstantiatingTemplate::isAlreadyBeingInstantiated function. if (S.SatisfactionStackContains(Templ, ID)) { S.Diag(E->getExprLoc(), diag::err_constraint_depends_on_self) - << const_cast<Expr *>(E) << E->getSourceRange(); + << E << E->getSourceRange(); return true; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 9ef7a26..0069b08 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -18909,8 +18909,7 @@ ExprResult Sema::VerifyBitField(SourceLocation FieldLoc, // 'bool'. if (BitfieldIsOverwide && !FieldTy->isBooleanType() && FieldName) { Diag(FieldLoc, diag::warn_bitfield_width_exceeds_type_width) - << FieldName << toString(Value, 10) - << (unsigned)TypeWidth; + << FieldName << Value << (unsigned)TypeWidth; } } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 552c929..a0483c3 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1493,6 +1493,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3b267c1..06b2529 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -16791,12 +16791,11 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, Expr *OrigExpr = E; bool IsMS = false; - // CUDA device code does not support varargs. + // CUDA device global function does not support varargs. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { if (const FunctionDecl *F = dyn_cast<FunctionDecl>(CurContext)) { CUDAFunctionTarget T = CUDA().IdentifyTarget(F); - if (T == CUDAFunctionTarget::Global || T == CUDAFunctionTarget::Device || - T == CUDAFunctionTarget::HostDevice) + if (T == CUDAFunctionTarget::Global) return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device)); } } @@ -20108,8 +20107,9 @@ static void DoMarkVarDeclReferenced( bool NeededForConstantEvaluation = isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr; - bool NeedDefinition = - OdrUse == OdrUseContext::Used || NeededForConstantEvaluation; + bool NeedDefinition = OdrUse == OdrUseContext::Used || + NeededForConstantEvaluation || + Var->getType()->isUndeducedType(); assert(!isa<VarTemplatePartialSpecializationDecl>(Var) && "Can't instantiate a partial template specialization."); diff --git a/clang/lib/Sema/SemaOpenACCAtomic.cpp b/clang/lib/Sema/SemaOpenACCAtomic.cpp index a9319dc..ad21129 100644 --- a/clang/lib/Sema/SemaOpenACCAtomic.cpp +++ b/clang/lib/Sema/SemaOpenACCAtomic.cpp @@ -454,9 +454,7 @@ class AtomicOperandChecker { // If nothing matches, error out. DiagnoseInvalidAtomic(BinInf->FoundExpr->getExprLoc(), SemaRef.PDiag(diag::note_acc_atomic_mismatch_operand) - << const_cast<Expr *>(AssignInf.LHS) - << const_cast<Expr *>(BinInf->LHS) - << const_cast<Expr *>(BinInf->RHS)); + << AssignInf.LHS << BinInf->LHS << BinInf->RHS); return IDACInfo::Fail(); } @@ -592,8 +590,7 @@ class AtomicOperandChecker { PartialDiagnostic PD = SemaRef.PDiag(diag::note_acc_atomic_mismatch_compound_operand) - << FirstKind << const_cast<Expr *>(FirstX) << SecondKind - << const_cast<Expr *>(SecondX); + << FirstKind << FirstX << SecondKind << SecondX; return DiagnoseInvalidAtomic(SecondX->getExprLoc(), PD); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 48e06d1..0fa21e8 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2490,7 +2490,8 @@ VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, DSAStackTy::DSAVarData DVarTop = DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode()); if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind) && - (!VD || VD->hasLocalStorage() || !DVarTop.AppliedToPointee)) + (!VD || VD->hasLocalStorage() || + !(DVarTop.AppliedToPointee && DVarTop.CKind != OMPC_reduction))) return VD ? VD : cast<VarDecl>(DVarTop.PrivateCopy->getDecl()); // Threadprivate variables must not be captured. if (isOpenMPThreadPrivate(DVarTop.CKind)) @@ -4569,6 +4570,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_unroll: case OMPD_reverse: case OMPD_interchange: + case OMPD_fuse: case OMPD_assume: break; default: @@ -6410,6 +6412,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_fuse: + Res = + ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -9488,7 +9494,9 @@ static bool checkOpenMPIterationSpace( // sharing attributes. VarsWithImplicitDSA.erase(LCDecl); - assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars"); + assert((isOpenMPLoopDirective(DKind) || + isOpenMPCanonicalLoopSequenceTransformationDirective(DKind)) && + "DSA for non-loop vars"); // Check test-expr. HasErrors |= ISC.checkAndSetCond(For ? For->getCond() : CXXFor->getCond()); @@ -9916,7 +9924,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, unsigned NumLoops = std::max(OrderedLoopCount, NestedLoopCount); SmallVector<LoopIterationSpace, 4> IterSpaces(NumLoops); if (!OMPLoopBasedDirective::doForAllLoops( - AStmt->IgnoreContainers(!isOpenMPLoopTransformationDirective(DKind)), + AStmt->IgnoreContainers( + !isOpenMPCanonicalLoopNestTransformationDirective(DKind)), SupportsNonPerfectlyNested, NumLoops, [DKind, &SemaRef, &DSA, NumLoops, NestedLoopCount, CollapseLoopCountExpr, OrderedLoopCountExpr, &VarsWithImplicitDSA, @@ -9938,8 +9947,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, } return false; }, - [&SemaRef, - &Captures](OMPCanonicalLoopNestTransformationDirective *Transform) { + [&SemaRef, &Captures](OMPLoopTransformationDirective *Transform) { Stmt *DependentPreInits = Transform->getPreInits(); if (!DependentPreInits) return; @@ -9954,7 +9962,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, auto *D = cast<VarDecl>(C); DeclRefExpr *Ref = buildDeclRefExpr( SemaRef, D, D->getType().getNonReferenceType(), - Transform->getBeginLoc()); + cast<OMPExecutableDirective>(Transform->getDirective()) + ->getBeginLoc()); Captures[Ref] = Ref; } } @@ -14404,10 +14413,34 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +/// Updates OriginalInits by checking Transform against loop transformation +/// directives and appending their pre-inits if a match is found. +static void updatePreInits(OMPLoopTransformationDirective *Transform, + SmallVectorImpl<Stmt *> &PreInits) { + Stmt *Dir = Transform->getDirective(); + switch (Dir->getStmtClass()) { +#define STMT(CLASS, PARENT) +#define ABSTRACT_STMT(CLASS) +#define COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT) \ + case Stmt::CLASS##Class: \ + appendFlattenedStmtList(PreInits, \ + static_cast<const CLASS *>(Dir)->getPreInits()); \ + break; +#define OMPCANONICALLOOPNESTTRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT) +#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT) +#include "clang/AST/StmtNodes.inc" +#undef COMMON_OMP_LOOP_TRANSFORMATION + default: + llvm_unreachable("Not a loop transformation"); + } +} + bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers, - Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits) { + Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits) { OriginalInits.emplace_back(); bool Result = OMPLoopBasedDirective::doForAllLoops( AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops, @@ -14433,29 +14466,268 @@ bool SemaOpenMP::checkTransformableLoopNest( OriginalInits.emplace_back(); return false; }, - [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast<OMPTileDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPStripeDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + [&OriginalInits](OMPLoopTransformationDirective *Transform) { + updatePreInits(Transform, OriginalInits.back()); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); return Result; } -/// Add preinit statements that need to be propageted from the selected loop. +/// Counts the total number of OpenMP canonical nested loops, including the +/// outermost loop (the original loop). PRECONDITION of this visitor is that it +/// must be invoked from the original loop to be analyzed. The traversal stops +/// for Decl's and Expr's given that they may contain inner loops that must not +/// be counted. +/// +/// Example AST structure for the code: +/// +/// int main() { +/// #pragma omp fuse +/// { +/// for (int i = 0; i < 100; i++) { <-- Outer loop +/// []() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP (1) +/// }; +/// for(int j = 0; j < 5; ++j) {} <-- Inner loop +/// } +/// for (int r = 0; i < 100; i++) { <-- Outer loop +/// struct LocalClass { +/// void bar() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP (2) +/// } +/// }; +/// for(int k = 0; k < 10; ++k) {} <-- Inner loop +/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP (3) +/// } +/// } +/// } +/// (1) because in a different function (here: a lambda) +/// (2) because in a different function (here: class method) +/// (3) because considered to be intervening-code of non-perfectly nested loop +/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops. +class NestedLoopCounterVisitor final : public DynamicRecursiveASTVisitor { +private: + unsigned NestedLoopCount = 0; + +public: + explicit NestedLoopCounterVisitor() = default; + + unsigned getNestedLoopCount() const { return NestedLoopCount; } + + bool VisitForStmt(ForStmt *FS) override { + ++NestedLoopCount; + return true; + } + + bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { + ++NestedLoopCount; + return true; + } + + bool TraverseStmt(Stmt *S) override { + if (!S) + return true; + + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted. + if (isa<Expr>(S)) + return true; + + // Only recurse into CompoundStmt (block {}) and loop bodies. + if (isa<CompoundStmt, ForStmt, CXXForRangeStmt>(S)) { + return DynamicRecursiveASTVisitor::TraverseStmt(S); + } + + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...). + return true; + } + + bool TraverseDecl(Decl *D) override { + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...). + return true; + } +}; + +bool SemaOpenMP::analyzeLoopSequence(Stmt *LoopSeqStmt, + LoopSequenceAnalysis &SeqAnalysis, + ASTContext &Context, + OpenMPDirectiveKind Kind) { + VarsWithInheritedDSAType TmpDSA; + // Helper Lambda to handle storing initialization and body statements for + // both ForStmt and CXXForRangeStmt. + auto StoreLoopStatements = [](LoopAnalysis &Analysis, Stmt *LoopStmt) { + if (auto *For = dyn_cast<ForStmt>(LoopStmt)) { + Analysis.OriginalInits.push_back(For->getInit()); + Analysis.TheForStmt = For; + } else { + auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt); + Analysis.OriginalInits.push_back(CXXFor->getBeginStmt()); + Analysis.TheForStmt = CXXFor; + } + }; + + // Helper lambda functions to encapsulate the processing of different + // derivations of the canonical loop sequence grammar + // Modularized code for handling loop generation and transformations. + auto AnalyzeLoopGeneration = [&](Stmt *Child) { + auto *LoopTransform = cast<OMPLoopTransformationDirective>(Child); + Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); + unsigned NumGeneratedTopLevelLoops = + LoopTransform->getNumGeneratedTopLevelLoops(); + // Handle the case where transformed statement is not available due to + // dependent contexts + if (!TransformedStmt) { + if (NumGeneratedTopLevelLoops > 0) { + SeqAnalysis.LoopSeqSize += NumGeneratedTopLevelLoops; + return true; + } + // Unroll full (0 loops produced) + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + // Handle loop transformations with multiple loop nests + // Unroll full + if (!NumGeneratedTopLevelLoops) { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + // Loop transformatons such as split or loopranged fuse + if (NumGeneratedTopLevelLoops > 1) { + // Get the preinits related to this loop sequence generating + // loop transformation (i.e loopranged fuse, split...) + // These preinits differ slightly from regular inits/pre-inits related + // to single loop generating loop transformations (interchange, unroll) + // given that they are not bounded to a particular loop nest + // so they need to be treated independently + updatePreInits(LoopTransform, SeqAnalysis.LoopSequencePreInits); + return analyzeLoopSequence(TransformedStmt, SeqAnalysis, Context, Kind); + } + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + // Process the transformed loop statement + LoopAnalysis &NewTransformedSingleLoop = + SeqAnalysis.Loops.emplace_back(Child); + unsigned IsCanonical = checkOpenMPLoop( + Kind, nullptr, nullptr, TransformedStmt, SemaRef, *DSAStack, TmpDSA, + NewTransformedSingleLoop.HelperExprs); + + if (!IsCanonical) + return false; + + StoreLoopStatements(NewTransformedSingleLoop, TransformedStmt); + updatePreInits(LoopTransform, NewTransformedSingleLoop.TransformsPreInits); + + SeqAnalysis.LoopSeqSize++; + return true; + }; + + // Modularized code for handling regular canonical loops. + auto AnalyzeRegularLoop = [&](Stmt *Child) { + LoopAnalysis &NewRegularLoop = SeqAnalysis.Loops.emplace_back(Child); + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, NewRegularLoop.HelperExprs); + + if (!IsCanonical) + return false; + + StoreLoopStatements(NewRegularLoop, Child); + NestedLoopCounterVisitor NLCV; + NLCV.TraverseStmt(Child); + return true; + }; + + // High level grammar validation. + for (Stmt *Child : LoopSeqStmt->children()) { + if (!Child) + continue; + // Skip over non-loop-sequence statements. + if (!LoopSequenceAnalysis::isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + // Ignore empty compound statement. + if (!Child) + continue; + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required. + if (isa<CompoundStmt>(Child)) { + if (!analyzeLoopSequence(Child, SeqAnalysis, Context, Kind)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling. + if (LoopSequenceAnalysis::isLoopSequenceDerivation(Child)) { + if (LoopAnalysis::isLoopTransformation(Child)) { + if (!AnalyzeLoopGeneration(Child)) + return false; + // AnalyzeLoopGeneration updates SeqAnalysis.LoopSeqSize accordingly. + } else { + if (!AnalyzeRegularLoop(Child)) + return false; + SeqAnalysis.LoopSeqSize++; + } + } else { + // Report error for invalid statement inside canonical loop sequence. + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + return true; +} + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, LoopSequenceAnalysis &SeqAnalysis, + ASTContext &Context) { + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the helper function + // analyzeLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure. This function will perform both a semantic and syntactical + // analysis of the given statement according to OpenMP 6.0 definition of + // the aforementioned canonical loop sequence. + + // We expect an outer compound statement. + if (!isa<CompoundStmt>(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + + // Recursive entry point to process the main loop sequence + if (!analyzeLoopSequence(AStmt, SeqAnalysis, Context, Kind)) + return false; + + // Diagnose an empty loop sequence. + if (!SeqAnalysis.LoopSeqSize) { + Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + return true; +} + +/// Add preinit statements that need to be propagated from the selected loop. static void addLoopPreInits(ASTContext &Context, OMPLoopBasedDirective::HelperExprs &LoopHelper, Stmt *LoopStmt, ArrayRef<Stmt *> OriginalInit, @@ -14540,7 +14812,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, // Verify and diagnose loop nest. SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits; + SmallVector<SmallVector<Stmt *>, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -14817,7 +15089,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef<OMPClause *> Clauses, // Verify and diagnose loop nest. SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits; + SmallVector<SmallVector<Stmt *>, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15078,7 +15350,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses, Stmt *Body = nullptr; SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers( NumLoops); - SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits; + SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15348,7 +15620,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, Stmt *Body = nullptr; SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers( NumLoops); - SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits; + SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15540,7 +15812,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( // Verify and diagnose loop nest. SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector<SmallVector<Stmt *, 0>, 2> OriginalInits; + SmallVector<SmallVector<Stmt *>, 2> OriginalInits; if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15716,6 +15988,484 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + + ASTContext &Context = getASTContext(); + DeclContext *CurrContext = SemaRef.CurContext; + Scope *CurScope = SemaRef.getCurScope(); + CaptureVars CopyTransformer(SemaRef); + + // Ensure the structured block is not empty + if (!AStmt) + return StmtError(); + + // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder 1 (even though + // using looprange fuse could yield up to 3 top level loop nests) + // because a dependent context could prevent determining its true value + if (CurrContext->isDependentContext()) + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + /* NumLoops */ 1, AStmt, nullptr, nullptr); + + // Validate that the potential loop sequence is transformable for fusion + // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops + LoopSequenceAnalysis SeqAnalysis; + if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, SeqAnalysis, Context)) + return StmtError(); + + // SeqAnalysis.LoopSeqSize exists mostly to handle dependent contexts, + // otherwise it must be the same as SeqAnalysis.Loops.size(). + assert(SeqAnalysis.LoopSeqSize == SeqAnalysis.Loops.size() && + "Inconsistent size of the loop sequence and the number of loops " + "found in the sequence"); + + // Handle clauses, which can be any of the following: [looprange, apply] + const auto *LRC = + OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses); + + // The clause arguments are invalidated if any error arises + // such as non-constant or non-positive arguments + if (LRC && (!LRC->getFirst() || !LRC->getCount())) + return StmtError(); + + // Delayed semantic check of LoopRange constraint + // Evaluates the loop range arguments and returns the first and count values + auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count, + uint64_t &FirstVal, + uint64_t &CountVal) { + llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context); + llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context); + FirstVal = FirstInt.getZExtValue(); + CountVal = CountInt.getZExtValue(); + }; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, + unsigned NumLoops) -> bool { + return FirstVal + CountVal - 1 <= NumLoops; + }; + uint64_t FirstVal = 1, CountVal = 0, LastVal = SeqAnalysis.LoopSeqSize; + + // Validates the loop range after evaluating the semantic information + // and ensures that the range is valid for the given loop sequence size. + // Expressions are evaluated at compile time to obtain constant values. + if (LRC) { + EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, + CountVal); + if (CountVal == 1) + SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + if (!ValidLoopRange(FirstVal, CountVal, SeqAnalysis.LoopSeqSize)) { + SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange) + << getOpenMPDirectiveName(OMPD_fuse) << FirstVal + << (FirstVal + CountVal - 1) << SeqAnalysis.LoopSeqSize; + return StmtError(); + } + + LastVal = FirstVal + CountVal - 1; + } + + // Complete fusion generates a single canonical loop nest + // However looprange clause may generate several loop nests + unsigned NumGeneratedTopLevelLoops = + LRC ? SeqAnalysis.LoopSeqSize - CountVal + 1 : 1; + + // Emit a warning for redundant loop fusion when the sequence contains only + // one loop. + if (SeqAnalysis.LoopSeqSize == 1) + SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + // Select the type with the largest bit width among all induction variables + QualType IVType = + SeqAnalysis.Loops[FirstVal - 1].HelperExprs.IterationVarRef->getType(); + for (unsigned I : llvm::seq<unsigned>(FirstVal, LastVal)) { + QualType CurrentIVType = + SeqAnalysis.Loops[I].HelperExprs.IterationVarRef->getType(); + if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { + IVType = CurrentIVType; + } + } + uint64_t IVBitWidth = Context.getIntWidth(IVType); + + // Create pre-init declarations for all loops lower bounds, upper bounds, + // strides and num-iterations for every top level loop in the fusion + SmallVector<VarDecl *, 4> LBVarDecls; + SmallVector<VarDecl *, 4> STVarDecls; + SmallVector<VarDecl *, 4> NIVarDecls; + SmallVector<VarDecl *, 4> UBVarDecls; + SmallVector<VarDecl *, 4> IVVarDecls; + + // Helper lambda to create variables for bounds, strides, and other + // expressions. Generates both the variable declaration and the corresponding + // initialization statement. + auto CreateHelperVarAndStmt = + [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName, + unsigned I, bool NeedsNewVD = false) { + Expr *TransformedExpr = + AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); + if (!TransformedExpr) + return std::pair<VarDecl *, StmtResult>(nullptr, StmtError()); + + auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str(); + + VarDecl *VD; + if (NeedsNewVD) { + VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name); + SemaRef.AddInitializerToDecl(VD, TransformedExpr, false); + } else { + // Create a unique variable name + DeclRefExpr *DRE = cast<DeclRefExpr>(TransformedExpr); + VD = cast<VarDecl>(DRE->getDecl()); + VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name)); + } + // Create the corresponding declaration statement + StmtResult DeclStmt = new (Context) class DeclStmt( + DeclGroupRef(VD), SourceLocation(), SourceLocation()); + return std::make_pair(VD, DeclStmt); + }; + + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. Other loop transforms + // also contain their own preinits + SmallVector<Stmt *> PreInits; + + // Update the general preinits using the preinits generated by loop sequence + // generating loop transformations. These preinits differ slightly from + // single-loop transformation preinits, as they can be detached from a + // specific loop inside multiple generated loop nests. This happens + // because certain helper variables, like '.omp.fuse.max', are introduced to + // handle fused iteration spaces and may not be directly tied to a single + // original loop. The preinit structure must ensure that hidden variables + // like '.omp.fuse.max' are still properly handled. + // Transformations that apply this concept: Loopranged Fuse, Split + llvm::append_range(PreInits, SeqAnalysis.LoopSequencePreInits); + + // Process each single loop to generate and collect declarations + // and statements for all helper expressions related to + // particular single loop nests + + // Also In the case of the fused loops, we keep track of their original + // inits by appending them to their preinits statement, and in the case of + // transformations, also append their preinits (which contain the original + // loop initialization statement or other statements) + + // Firstly we need to set TransformIndex to match the begining of the + // looprange section + unsigned int TransformIndex = 0; + for (unsigned I : llvm::seq<unsigned>(FirstVal - 1)) { + if (SeqAnalysis.Loops[I].isLoopTransformation()) + ++TransformIndex; + } + + for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + if (SeqAnalysis.Loops[I].isRegularLoop()) { + addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs, + SeqAnalysis.Loops[I].TheForStmt, + SeqAnalysis.Loops[I].OriginalInits, PreInits); + } else if (SeqAnalysis.Loops[I].isLoopTransformation()) { + // For transformed loops, insert both pre-inits and original inits. + // Order matters: pre-inits may define variables used in the original + // inits such as upper bounds... + SmallVector<Stmt *> &TransformPreInit = + SeqAnalysis.Loops[TransformIndex++].TransformsPreInits; + llvm::append_range(PreInits, TransformPreInit); + + addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs, + SeqAnalysis.Loops[I].TheForStmt, + SeqAnalysis.Loops[I].OriginalInits, PreInits); + } + auto [UBVD, UBDStmt] = + CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.UB, "ub", J); + auto [LBVD, LBDStmt] = + CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.LB, "lb", J); + auto [STVD, STDStmt] = + CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.ST, "st", J); + auto [NIVD, NIDStmt] = CreateHelperVarAndStmt( + SeqAnalysis.Loops[I].HelperExprs.NumIterations, "ni", J, true); + auto [IVVD, IVDStmt] = CreateHelperVarAndStmt( + SeqAnalysis.Loops[I].HelperExprs.IterationVarRef, "iv", J); + + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); + + UBVarDecls.push_back(UBVD); + LBVarDecls.push_back(LBVD); + STVarDecls.push_back(STVD); + NIVarDecls.push_back(NIVD); + IVVarDecls.push_back(IVVD); + + PreInits.push_back(LBDStmt.get()); + PreInits.push_back(STDStmt.get()); + PreInits.push_back(NIDStmt.get()); + PreInits.push_back(IVDStmt.get()); + } + + auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) { + return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(), + false); + }; + + // Following up the creation of the final fused loop will be performed + // which has the following shape (considering the selected loops): + // + // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) { + // if (fuse.index < ni0){ + // iv0 = lb0 + st0 * fuse.index; + // original.index0 = iv0 + // body(0); + // } + // if (fuse.index < ni1){ + // iv1 = lb1 + st1 * fuse.index; + // original.index1 = iv1 + // body(1); + // } + // + // ... + // + // if (fuse.index < nik){ + // ivk = lbk + stk * fuse.index; + // original.indexk = ivk + // body(k); Expr *InitVal = IntegerLiteral::Create(Context, + // llvm::APInt(IVWidth, 0), + // } + + // 1. Create the initialized fuse index + StringRef IndexName = ".omp.fuse.index"; + Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), + IVType, SourceLocation()); + VarDecl *IndexDecl = + buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr); + SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation()); + + if (!InitStmt.isUsable()) + return StmtError(); + + auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType, + Loc = InitVal->getExprLoc()]() { + return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false); + }; + + // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2, + // ..., NI_k) + // + // This loop accumulates the maximum value across multiple expressions, + // ensuring each step constructs a unique AST node for correctness. By using + // intermediate temporary variables and conditional operators, we maintain + // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c): + // omp.temp0 = max(a, b) + // omp.temp1 = max(omp.temp0, c) + // omp.fuse.max = max(omp.temp1, omp.temp0) + + ExprResult MaxExpr; + // I is the range of loops in the sequence that we fuse. + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]); + QualType NITy = NIRef->getType(); + + if (MaxExpr.isUnset()) { + // Initialize MaxExpr with the first NI expression + MaxExpr = NIRef; + } else { + // Create a new acummulator variable t_i = MaxExpr + std::string TempName = (Twine(".omp.temp.") + Twine(J)).str(); + VarDecl *TempDecl = + buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); + TempDecl->setInit(MaxExpr.get()); + DeclRefExpr *TempRef = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + DeclRefExpr *TempRef2 = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + // Add a DeclStmt to PreInits to ensure the variable is declared. + StmtResult TempStmt = new (Context) + DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation()); + + if (!TempStmt.isUsable()) + return StmtError(); + PreInits.push_back(TempStmt.get()); + + // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef) + ExprResult Comparison = + SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef); + // Handle any errors in Comparison creation + if (!Comparison.isUsable()) + return StmtError(); + + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]); + // Update MaxExpr using a conditional expression to hold the max value + MaxExpr = new (Context) ConditionalOperator( + Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), + NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary); + + if (!MaxExpr.isUsable()) + return StmtError(); + } + } + if (!MaxExpr.isUsable()) + return StmtError(); + + // 3. Declare the max variable + const std::string MaxName = Twine(".omp.fuse.max").str(); + VarDecl *MaxDecl = + buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr); + MaxDecl->setInit(MaxExpr.get()); + DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false); + StmtResult MaxStmt = new (Context) + DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation()); + + if (MaxStmt.isInvalid()) + return StmtError(); + PreInits.push_back(MaxStmt.get()); + + // 4. Create condition Expr: index < n_max + ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, + MakeIVRef(), MaxRef); + if (!CondExpr.isUsable()) + return StmtError(); + + // 5. Increment Expr: ++index + ExprResult IncrExpr = + SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef()); + if (!IncrExpr.isUsable()) + return StmtError(); + + // 6. Build the Fused Loop Body + // The final fused loop iterates over the maximum logical range. Inside the + // loop, each original loop's index is calculated dynamically, and its body + // is executed conditionally. + // + // Each sub-loop's body is guarded by a conditional statement to ensure + // it executes only within its logical iteration range: + // + // if (fuse.index < ni_k){ + // iv_k = lb_k + st_k * fuse.index; + // original.index = iv_k + // body(k); + // } + + CompoundStmt *FusedBody = nullptr; + SmallVector<Stmt *, 4> FusedBodyStmts; + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + // Assingment of the original sub-loop index to compute the logical index + // IV_k = LB_k + omp.fuse.index * ST_k + ExprResult IdxExpr = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, + MakeVarDeclRef(STVarDecls[J]), MakeIVRef()); + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, + MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get()); + + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, + MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get()); + if (!IdxExpr.isUsable()) + return StmtError(); + + // Update the original i_k = IV_k + SmallVector<Stmt *, 4> BodyStmts; + BodyStmts.push_back(IdxExpr.get()); + llvm::append_range(BodyStmts, SeqAnalysis.Loops[I].HelperExprs.Updates); + + // If the loop is a CXXForRangeStmt then the iterator variable is needed + if (auto *SourceCXXFor = + dyn_cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].TheForStmt)) + BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); + + Stmt *Body = + (isa<ForStmt>(SeqAnalysis.Loops[I].TheForStmt)) + ? cast<ForStmt>(SeqAnalysis.Loops[I].TheForStmt)->getBody() + : cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].TheForStmt)->getBody(); + BodyStmts.push_back(Body); + + CompoundStmt *CombinedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + ExprResult Condition = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), + MakeVarDeclRef(NIVarDecls[J])); + + if (!Condition.isUsable()) + return StmtError(); + + IfStmt *IfStatement = IfStmt::Create( + Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr, + Condition.get(), SourceLocation(), SourceLocation(), CombinedBody, + SourceLocation(), nullptr); + + FusedBodyStmts.push_back(IfStatement); + } + FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + + // 7. Construct the final fused loop + ForStmt *FusedForStmt = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(), + FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), + IncrExpr.get()->getEndLoc()); + + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + // + // Note: If looprange clause produces a single fused loop nest then + // this compound statement wrapper is unnecessary (Therefore this + // treatment is skipped) + + Stmt *FusionStmt = FusedForStmt; + if (LRC && CountVal != SeqAnalysis.LoopSeqSize) { + SmallVector<Stmt *, 4> FinalLoops; + + // Reset the transform index + TransformIndex = 0; + + // Collect all non-fused loops before and after the fused region. + // Pre-fusion and post-fusion loops are inserted in order exploiting their + // symmetry, along with their corresponding transformation pre-inits if + // needed. The fused loop is added between the two regions. + for (unsigned I : llvm::seq<unsigned>(SeqAnalysis.LoopSeqSize)) { + if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) { + // Update the Transformation counter to skip already treated + // loop transformations + if (!SeqAnalysis.Loops[I].isLoopTransformation()) + ++TransformIndex; + continue; + } + + // No need to handle: + // Regular loops: they are kept intact as-is. + // Loop-sequence-generating transformations: already handled earlier. + // Only TransformSingleLoop requires inserting pre-inits here + if (SeqAnalysis.Loops[I].isRegularLoop()) { + const auto &TransformPreInit = + SeqAnalysis.Loops[TransformIndex++].TransformsPreInits; + if (!TransformPreInit.empty()) + llvm::append_range(PreInits, TransformPreInit); + } + + FinalLoops.push_back(SeqAnalysis.Loops[I].TheForStmt); + } + + FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt); + FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + } + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumGeneratedTopLevelLoops, AStmt, FusionStmt, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, @@ -16887,6 +17637,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr, FactorExpr); } +OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( + Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) { + + // OpenMP [6.0, Restrictions] + // First and Count must be integer expressions with positive value + ExprResult FirstVal = + VerifyPositiveIntegerConstantInClause(First, OMPC_looprange); + if (FirstVal.isInvalid()) + First = nullptr; + + ExprResult CountVal = + VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange); + if (CountVal.isInvalid()) + Count = nullptr; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + // This check must be performed afterwards due to the delayed + // parsing and computation of the associated loop sequence + return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, + FirstLoc, CountLoc, EndLoc, First, Count); +} + OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 3ebbb30..2bf1511 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7102,7 +7102,7 @@ ExprResult Sema::CheckTemplateArgument(NamedDecl *Param, QualType ParamType, // If the parameter type somehow involves auto, deduce the type now. DeducedType *DeducedT = ParamType->getContainedDeducedType(); - bool IsDeduced = DeducedT && !DeducedT->isDeduced(); + bool IsDeduced = DeducedT && DeducedT->getDeducedType().isNull(); if (IsDeduced) { // When checking a deduced template argument, deduce from its type even if // the type is dependent, in order to check the types of non-type template diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 1ff94d7..f1c9c5c 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -616,29 +616,30 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( Invalid = true; return; } - Invalid = CheckInstantiationDepth(PointOfInstantiation, InstantiationRange); + + CodeSynthesisContext Inst; + Inst.Kind = Kind; + Inst.PointOfInstantiation = PointOfInstantiation; + Inst.Entity = Entity; + Inst.Template = Template; + Inst.TemplateArgs = TemplateArgs.data(); + Inst.NumTemplateArgs = TemplateArgs.size(); + Inst.DeductionInfo = DeductionInfo; + Inst.InstantiationRange = InstantiationRange; + Inst.InConstraintSubstitution = + Inst.Kind == CodeSynthesisContext::ConstraintSubstitution; + if (!SemaRef.CodeSynthesisContexts.empty()) + Inst.InConstraintSubstitution |= + SemaRef.CodeSynthesisContexts.back().InConstraintSubstitution; + + Invalid = SemaRef.pushCodeSynthesisContext(Inst); if (!Invalid) { - CodeSynthesisContext Inst; - Inst.Kind = Kind; - Inst.PointOfInstantiation = PointOfInstantiation; - Inst.Entity = Entity; - Inst.Template = Template; - Inst.TemplateArgs = TemplateArgs.data(); - Inst.NumTemplateArgs = TemplateArgs.size(); - Inst.DeductionInfo = DeductionInfo; - Inst.InstantiationRange = InstantiationRange; - Inst.InConstraintSubstitution = - Inst.Kind == CodeSynthesisContext::ConstraintSubstitution; - if (!SemaRef.CodeSynthesisContexts.empty()) - Inst.InConstraintSubstitution |= - SemaRef.CodeSynthesisContexts.back().InConstraintSubstitution; - - SemaRef.pushCodeSynthesisContext(Inst); - - AlreadyInstantiating = !Inst.Entity ? false : - !SemaRef.InstantiatingSpecializations - .insert({Inst.Entity->getCanonicalDecl(), Inst.Kind}) - .second; + AlreadyInstantiating = + !Inst.Entity + ? false + : !SemaRef.InstantiatingSpecializations + .insert({Inst.Entity->getCanonicalDecl(), Inst.Kind}) + .second; atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, Inst); } } @@ -834,18 +835,34 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( : InstantiatingTemplate(SemaRef, CodeSynthesisContext::PartialOrderingTTP, ArgLoc, InstantiationRange, PArg) {} -void Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { +bool Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { Ctx.SavedInNonInstantiationSFINAEContext = InNonInstantiationSFINAEContext; InNonInstantiationSFINAEContext = false; - CodeSynthesisContexts.push_back(Ctx); - - if (!Ctx.isInstantiationRecord()) + if (!Ctx.isInstantiationRecord()) { ++NonInstantiationEntries; + } else { + assert(SemaRef.NonInstantiationEntries <= + SemaRef.CodeSynthesisContexts.size()); + if ((SemaRef.CodeSynthesisContexts.size() - + SemaRef.NonInstantiationEntries) > + SemaRef.getLangOpts().InstantiationDepth) { + SemaRef.Diag(Ctx.PointOfInstantiation, + diag::err_template_recursion_depth_exceeded) + << SemaRef.getLangOpts().InstantiationDepth << Ctx.InstantiationRange; + SemaRef.Diag(Ctx.PointOfInstantiation, + diag::note_template_recursion_depth) + << SemaRef.getLangOpts().InstantiationDepth; + return true; + } + } + + CodeSynthesisContexts.push_back(Ctx); // Check to see if we're low on stack space. We can't do anything about this // from here, but we can at least warn the user. StackHandler.warnOnStackNearlyExhausted(Ctx.PointOfInstantiation); + return false; } void Sema::popCodeSynthesisContext() { @@ -907,25 +924,6 @@ static std::string convertCallArgsToString(Sema &S, return Result; } -bool Sema::InstantiatingTemplate::CheckInstantiationDepth( - SourceLocation PointOfInstantiation, - SourceRange InstantiationRange) { - assert(SemaRef.NonInstantiationEntries <= - SemaRef.CodeSynthesisContexts.size()); - if ((SemaRef.CodeSynthesisContexts.size() - - SemaRef.NonInstantiationEntries) - <= SemaRef.getLangOpts().InstantiationDepth) - return false; - - SemaRef.Diag(PointOfInstantiation, - diag::err_template_recursion_depth_exceeded) - << SemaRef.getLangOpts().InstantiationDepth - << InstantiationRange; - SemaRef.Diag(PointOfInstantiation, diag::note_template_recursion_depth) - << SemaRef.getLangOpts().InstantiationDepth; - return true; -} - void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) { // Determine which template instantiations to skip, if any. unsigned SkipStart = CodeSynthesisContexts.size(), SkipEnd = SkipStart; diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp index c2427dcf..3e34675 100644 --- a/clang/lib/Sema/SemaTypeTraits.cpp +++ b/clang/lib/Sema/SemaTypeTraits.cpp @@ -1163,13 +1163,16 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT, // - it has at least one trivial eligible constructor and a trivial, // non-deleted destructor. const CXXDestructorDecl *Dtor = RD->getDestructor(); - if (UnqualT->isAggregateType()) - if (Dtor && !Dtor->isUserProvided()) - return true; - if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) - if (RD->hasTrivialDefaultConstructor() || - RD->hasTrivialCopyConstructor() || RD->hasTrivialMoveConstructor()) - return true; + if (UnqualT->isAggregateType() && (!Dtor || !Dtor->isUserProvided())) + return true; + if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) { + for (CXXConstructorDecl *Ctr : RD->ctors()) { + if (Ctr->isIneligibleOrNotSelected() || Ctr->isDeleted()) + continue; + if (Ctr->isTrivial()) + return true; + } + } return false; } case UTT_IsIntangibleType: @@ -1827,10 +1830,10 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, return Self.HLSL().IsScalarizedLayoutCompatible(LhsT, RhsT); } - case BTT_LtSynthesisesFromSpaceship: - case BTT_LeSynthesisesFromSpaceship: - case BTT_GtSynthesisesFromSpaceship: - case BTT_GeSynthesisesFromSpaceship: { + case BTT_LtSynthesizesFromSpaceship: + case BTT_LeSynthesizesFromSpaceship: + case BTT_GtSynthesizesFromSpaceship: + case BTT_GeSynthesizesFromSpaceship: { EnterExpressionEvaluationContext UnevaluatedContext( Self, Sema::ExpressionEvaluationContext::Unevaluated); Sema::SFINAETrap SFINAE(Self, /*ForValidityCheck=*/true); @@ -1849,13 +1852,13 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, auto OpKind = [&] { switch (BTT) { - case BTT_LtSynthesisesFromSpaceship: + case BTT_LtSynthesizesFromSpaceship: return BinaryOperatorKind::BO_LT; - case BTT_LeSynthesisesFromSpaceship: + case BTT_LeSynthesizesFromSpaceship: return BinaryOperatorKind::BO_LE; - case BTT_GtSynthesisesFromSpaceship: + case BTT_GtSynthesizesFromSpaceship: return BinaryOperatorKind::BO_GT; - case BTT_GeSynthesisesFromSpaceship: + case BTT_GeSynthesizesFromSpaceship: return BinaryOperatorKind::BO_GE; default: llvm_unreachable("Trying to Synthesize non-comparison operator?"); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 0214078..6967301 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1783,6 +1783,14 @@ public: LParenLoc, EndLoc); } + OMPClause * + RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPLoopRangeClause( + First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -9609,6 +9617,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective( template <typename Derived> StmtResult +TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + +template <typename Derived> +StmtResult TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) { DeclarationNameInfo DirName; getDerived().getSema().OpenMP().StartOpenMPDSABlock( @@ -10502,6 +10521,31 @@ TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) { template <typename Derived> OMPClause * +TreeTransform<Derived>::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) { + ExprResult F = getDerived().TransformExpr(C->getFirst()); + if (F.isInvalid()) + return nullptr; + + ExprResult Cn = getDerived().TransformExpr(C->getCount()); + if (Cn.isInvalid()) + return nullptr; + + Expr *First = F.get(); + Expr *Count = Cn.get(); + + bool Changed = (First != C->getFirst()) || (Count != C->getCount()); + + // If no changes and AlwaysRebuild() is false, return the original clause + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + + return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(), + C->getLParenLoc(), C->getFirstLoc(), + C->getCountLoc(), C->getEndLoc()); +} + +template <typename Derived> +OMPClause * TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) { ExprResult E = getDerived().TransformExpr(C->getNumForLoops()); if (E.isInvalid()) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 9ee8a0f..c05e428 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11215,6 +11215,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_partial: C = OMPPartialClause::CreateEmpty(Context); break; + case llvm::omp::OMPC_looprange: + C = OMPLoopRangeClause::CreateEmpty(Context); + break; case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -11618,6 +11621,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + C->setFirst(Record.readSubExpr()); + C->setCount(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); + C->setFirstLoc(Record.readSourceLocation()); + C->setCountLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 213c2c2..70b898a 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2469,10 +2469,21 @@ void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) { VisitOMPCanonicalLoopNestTransformationDirective(D); } +void ASTStmtReader::VisitOMPCanonicalLoopSequenceTransformationDirective( + OMPCanonicalLoopSequenceTransformationDirective *D) { + VisitStmt(D); + VisitOMPExecutableDirective(D); + D->setNumGeneratedTopLevelLoops(Record.readUInt32()); +} + void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { VisitOMPCanonicalLoopNestTransformationDirective(D); } +void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPCanonicalLoopSequenceTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3615,6 +3626,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_FUSE_DIRECTIVE: { + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses); + break; + } + case STMT_OMP_INTERCHANGE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 09859da..cdf95ba 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7882,6 +7882,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + Record.AddStmt(C->getFirst()); + Record.AddStmt(C->getCount()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getFirstLoc()); + Record.AddSourceLocation(C->getCountLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 21c04dd..ebda91e 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2487,6 +2487,18 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPCanonicalLoopSequenceTransformationDirective( + OMPCanonicalLoopSequenceTransformationDirective *D) { + VisitStmt(D); + VisitOMPExecutableDirective(D); + Record.writeUInt32(D->getNumGeneratedTopLevelLoops()); +} + +void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPCanonicalLoopSequenceTransformationDirective(D); + Code = serialization::STMT_OMP_FUSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 36f316d..0ae784c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -672,6 +672,10 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, ProgramStateRef stateTrue, stateFalse; + if (!First.Expression->getType()->isAnyPointerType() || + !Second.Expression->getType()->isAnyPointerType()) + return state; + // Assume different address spaces cannot overlap. if (First.Expression->getType()->getPointeeType().getAddressSpace() != Second.Expression->getType()->getPointeeType().getAddressSpace()) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 785cdfa..4e472b7 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1814,6 +1814,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPStripeDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: diff --git a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp index 995019c..2856842 100644 --- a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp +++ b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -123,6 +123,15 @@ static types::ID foldType(types::ID Lang) { } } +// Return the language standard that's activated by the /std:c++latest +// flag in clang-CL mode. +static LangStandard::Kind latestLangStandard() { + // FIXME: Have a single source of truth for the mapping from + // c++latest --> c++26 that's shared by the driver code + // (clang/lib/Driver/ToolChains/Clang.cpp) and this file. + return LangStandard::lang_cxx26; +} + // A CompileCommand that can be applied to another file. struct TransferableCommand { // Flags that should not apply to all files are stripped from CommandLine. @@ -237,9 +246,16 @@ struct TransferableCommand { // --std flag may only be transferred if the language is the same. // We may consider "translating" these, e.g. c++11 -> c11. if (Std != LangStandard::lang_unspecified && foldType(TargetType) == Type) { - Result.CommandLine.emplace_back(( - llvm::Twine(ClangCLMode ? "/std:" : "-std=") + - LangStandard::getLangStandardForKind(Std).getName()).str()); + const char *Spelling = + LangStandard::getLangStandardForKind(Std).getName(); + // In clang-cl mode, the latest standard is spelled 'c++latest' rather + // than e.g. 'c++26', and the driver does not accept the latter, so emit + // the spelling that the driver does accept. + if (ClangCLMode && Std == latestLangStandard()) { + Spelling = "c++latest"; + } + Result.CommandLine.emplace_back( + (llvm::Twine(ClangCLMode ? "/std:" : "-std=") + Spelling).str()); } Result.CommandLine.push_back("--"); Result.CommandLine.push_back(std::string(Filename)); @@ -296,8 +312,14 @@ private: // Try to interpret the argument as '-std='. std::optional<LangStandard::Kind> tryParseStdArg(const llvm::opt::Arg &Arg) { using namespace driver::options; - if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ)) + if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ)) { + // "c++latest" is not a recognized LangStandard, but it's accepted by + // the clang driver in CL mode. + if (ClangCLMode && StringRef(Arg.getValue()) == "c++latest") { + return latestLangStandard(); + } return LangStandard::getLangKind(Arg.getValue()); + } return std::nullopt; } }; |