diff options
Diffstat (limited to 'clang')
113 files changed, 6466 insertions, 475 deletions
diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index c44e646..80140d2 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -1446,6 +1446,9 @@ class CursorKind(BaseEnumeration): # OpenMP stripe directive. OMP_STRIPE_DIRECTIVE = 310 + # OpenMP fuse directive. + OMP_FUSE_DIRECTIVE = 311 + # OpenACC Compute Construct. OPEN_ACC_COMPUTE_DIRECTIVE = 320 diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 6108e54..68ca7be 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -482,6 +482,8 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| loop transformation apply clause | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop fuse transformation | :good:`done` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| workdistribute construct | | :none:`in progress` | @skc7, @mjklemm |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| task_iteration | :none:`unclaimed` | :none:`unclaimed` | |
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 98c889c..79dc0b2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -432,6 +432,9 @@ Bug Fixes to C++ Support - Fix an assertion failure when taking the address on a non-type template parameter argument of object type. (#GH151531) - Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409). +- Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610) +- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196) +- Fixed a crash in the pre-C++23 warning for attributes before a lambda declarator (#GH161070). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -599,6 +602,7 @@ OpenMP Support - Added support for ``defaultmap`` directive implicit-behavior ``storage``. - Added support for ``defaultmap`` directive implicit-behavior ``private``. - Added parsing and semantic analysis support for ``groupprivate`` directive. +- Added support for 'omp fuse' directive. Improvements ^^^^^^^^^^^^ diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index be038d9..f13d9c9 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2162,6 +2162,10 @@ enum CXCursorKind { */ CXCursor_OMPStripeDirective = 310, + /** OpenMP fuse directive + */ + CXCursor_OMPFuseDirective = 311, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 42b4268..68d220a 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1149,6 +1149,80 @@ public: static OMPFullClause *CreateEmpty(const ASTContext &C); }; +/// This class represents the 'looprange' clause in the +/// '#pragma omp fuse' directive +/// +/// \code {c} +/// #pragma omp fuse looprange(1,2) +/// { +/// for(int i = 0; i < 64; ++i) +/// for(int j = 0; j < 256; j+=2) +/// for(int k = 127; k >= 0; --k) +/// \endcode +class OMPLoopRangeClause final : public OMPClause { + friend class OMPClauseReader; + /// Location of '(' + SourceLocation LParenLoc; + + /// Location of first and count expressions + SourceLocation FirstLoc, CountLoc; + + /// Number of looprange arguments (always 2: first, count) + enum { FirstExpr, CountExpr, NumArgs }; + Stmt *Args[NumArgs] = {nullptr, nullptr}; + + /// Set looprange 'first' expression + void setFirst(Expr *E) { Args[FirstExpr] = E; } + + /// Set looprange 'count' expression + void setCount(Expr *E) { Args[CountExpr] = E; } + + /// Build an empty clause for deserialization. + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + +public: + /// Build a 'looprange' clause AST node. + static OMPLoopRangeClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + SourceLocation EndLoc, Expr *First, Expr *Count); + + /// Build an empty 'looprange' clause node. + static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); + + // Location getters/setters + SourceLocation getLParenLoc() const { return LParenLoc; } + SourceLocation getFirstLoc() const { return FirstLoc; } + SourceLocation getCountLoc() const { return CountLoc; } + + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + + /// Get looprange 'first' expression + Expr *getFirst() const { return cast_or_null<Expr>(Args[FirstExpr]); } + + /// Get looprange 'count' expression + Expr *getCount() const { return cast_or_null<Expr>(Args[CountExpr]); } + + child_range children() { return child_range(Args, Args + NumArgs); } + const_child_range children() const { + return const_child_range(Args, Args + NumArgs); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_looprange; + } +}; + /// Representation of the 'partial' clause of the '#pragma omp unroll' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index af1a073..7a2881f 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3177,6 +3177,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective, DEF_TRAVERSE_STMT(OMPReverseDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPFuseDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPInterchangeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) @@ -3495,6 +3498,14 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) { } template <typename Derived> +bool RecursiveASTVisitor<Derived>::VisitOMPLoopRangeClause( + OMPLoopRangeClause *C) { + TRY_TO(TraverseStmt(C->getFirst())); + TRY_TO(TraverseStmt(C->getCount())); + return true; +} + +template <typename Derived> bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) { TRY_TO(TraverseStmt(C->getFactor())); return true; diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index d9f87f1..bc6aeaa8 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -21,6 +21,7 @@ #include "clang/AST/StmtCXX.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" +#include "llvm/Support/Casting.h" namespace clang { @@ -677,6 +678,10 @@ public: } }; +// Forward declaration of a generic loop transformation. Used in the declaration +// of OMPLoopBasedDirective. +class OMPLoopTransformationDirective; + /// The base class for all loop-based directives, including loop transformation /// directives. class OMPLoopBasedDirective : public OMPExecutableDirective { @@ -889,24 +894,23 @@ public: /// Calls the specified callback function for all the loops in \p CurStmt, /// from the outermost to the innermost. - static bool doForAllLoops( - Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, - llvm::function_ref<bool(unsigned, Stmt *)> Callback, - llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)> - OnTransformationCallback); + static bool + doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops, + unsigned NumLoops, + llvm::function_ref<bool(unsigned, Stmt *)> Callback, + llvm::function_ref<void(OMPLoopTransformationDirective *)> + OnTransformationCallback); static bool doForAllLoops(const Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, llvm::function_ref<bool(unsigned, const Stmt *)> Callback, - llvm::function_ref< - void(const OMPCanonicalLoopNestTransformationDirective *)> + llvm::function_ref<void(const OMPLoopTransformationDirective *)> OnTransformationCallback) { auto &&NewCallback = [Callback](unsigned Cnt, Stmt *CurStmt) { return Callback(Cnt, CurStmt); }; auto &&NewTransformCb = - [OnTransformationCallback]( - OMPCanonicalLoopNestTransformationDirective *A) { + [OnTransformationCallback](OMPLoopTransformationDirective *A) { OnTransformationCallback(A); }; return doForAllLoops(const_cast<Stmt *>(CurStmt), TryImperfectlyNestedLoops, @@ -919,7 +923,7 @@ public: doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, llvm::function_ref<bool(unsigned, Stmt *)> Callback) { - auto &&TransformCb = [](OMPCanonicalLoopNestTransformationDirective *) {}; + auto &&TransformCb = [](OMPLoopTransformationDirective *) {}; return doForAllLoops(CurStmt, TryImperfectlyNestedLoops, NumLoops, Callback, TransformCb); } @@ -957,9 +961,11 @@ public: }; /// Common class of data shared between -/// OMPCanonicalLoopNestTransformationDirective and transformations over -/// canonical loop sequences. +/// OMPCanonicalLoopNestTransformationDirective and +/// OMPCanonicalLoopSequenceTransformationDirective class OMPLoopTransformationDirective { + friend class ASTStmtReader; + /// Number of (top-level) generated loops. /// This value is 1 for most transformations as they only map one loop nest /// into another. @@ -969,15 +975,39 @@ class OMPLoopTransformationDirective { /// generate more than one loop nest, so the value would be >= 1. unsigned NumGeneratedTopLevelLoops = 1; + /// We need this because we cannot easily make OMPLoopTransformationDirective + /// a proper Stmt. + Stmt *S = nullptr; + protected: void setNumGeneratedTopLevelLoops(unsigned N) { NumGeneratedTopLevelLoops = N; } + explicit OMPLoopTransformationDirective(Stmt *S) : S(S) {} + public: unsigned getNumGeneratedTopLevelLoops() const { return NumGeneratedTopLevelLoops; } + + /// Returns the specific directive related to this loop transformation. + Stmt *getDirective() const { return S; } + + /// Get the de-sugared statements after the loop transformation. + /// + /// Might be nullptr if either the directive generates no loops and is handled + /// directly in CodeGen, or resolving a template-dependence context is + /// required. + Stmt *getTransformedStmt() const; + + /// Return preinits statement. + Stmt *getPreInits() const; + + static bool classof(const Stmt *T) { + return isa<OMPCanonicalLoopNestTransformationDirective, + OMPCanonicalLoopSequenceTransformationDirective>(T); + } }; /// The base class for all transformation directives of canonical loop nests. @@ -990,7 +1020,8 @@ protected: explicit OMPCanonicalLoopNestTransformationDirective( StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc, SourceLocation EndLoc, unsigned NumAssociatedLoops) - : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops) {} + : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops), + OMPLoopTransformationDirective(this) {} public: /// Return the number of associated (consumed) loops. @@ -5928,6 +5959,112 @@ public: } }; +/// The base class for all transformation directives of canonical loop +/// sequences (currently only 'fuse') +class OMPCanonicalLoopSequenceTransformationDirective + : public OMPExecutableDirective, + public OMPLoopTransformationDirective { + friend class ASTStmtReader; + +protected: + explicit OMPCanonicalLoopSequenceTransformationDirective( + StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc, + SourceLocation EndLoc) + : OMPExecutableDirective(SC, Kind, StartLoc, EndLoc), + OMPLoopTransformationDirective(this) {} + +public: + /// Get the de-sugared statements after the loop transformation. + /// + /// Might be nullptr if either the directive generates no loops and is handled + /// directly in CodeGen, or resolving a template-dependence context is + /// required. + Stmt *getTransformedStmt() const; + + /// Return preinits statement. + Stmt *getPreInits() const; + + static bool classof(const Stmt *T) { + Stmt::StmtClass C = T->getStmtClass(); + return C == OMPFuseDirectiveClass; + } +}; + +/// Represents the '#pragma omp fuse' loop transformation directive +/// +/// \code{c} +/// #pragma omp fuse +/// { +/// for(int i = 0; i < m1; ++i) {...} +/// for(int j = 0; j < m2; ++j) {...} +/// ... +/// } +/// \endcode +class OMPFuseDirective final + : public OMPCanonicalLoopSequenceTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + // Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPCanonicalLoopSequenceTransformationDirective( + OMPFuseDirectiveClass, llvm::omp::OMPD_fuse, StartLoc, EndLoc) {} + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for #pragma omp fuse' + /// + /// \param C Context of the AST + /// \param StartLoc Location of the introducer (e.g the 'omp' token) + /// \param EndLoc Location of the directive's end (e.g the tok::eod) + /// \param Clauses The directive's clauses + /// \param NumLoops Total number of loops in the canonical loop sequence. + /// \param NumGeneratedTopLevelLoops Number of top-level generated loops. + // Typically 1 but looprange clause can + // change this. + /// \param AssociatedStmt The outermost associated loop + /// \param TransformedStmt The loop nest after fusion, or nullptr in + /// dependent + /// \param PreInits Helper preinits statements for the loop nest + static OMPFuseDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef<OMPClause *> Clauses, unsigned NumGeneratedTopLevelLoops, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits); + + /// Build an empty '#pragma omp fuse' AST node for deserialization + /// + /// \param C Context of the AST + /// \param NumClauses Number of clauses to allocate + /// \param NumLoops Number of top level loops to allocate + static OMPFuseDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses); + + /// Gets the associated loops after the transformation. This is the de-sugared + /// replacement or nulltpr in dependent contexts. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPFuseDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code @@ -6596,4 +6733,37 @@ public: } // end namespace clang +namespace llvm { +// Allow a Stmt* be casted correctly to an OMPLoopTransformationDirective*. +// The default routines would just use a C-style cast which won't work well +// for the multiple inheritance here. We have to use a static cast from the +// corresponding subclass. +template <> +struct CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *> + : public NullableValueCastFailed<clang::OMPLoopTransformationDirective *>, + public DefaultDoCastIfPossible< + clang::OMPLoopTransformationDirective *, clang::Stmt *, + CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>> { + static bool isPossible(const clang::Stmt *T) { + return clang::OMPLoopTransformationDirective::classof(T); + } + + static clang::OMPLoopTransformationDirective *doCast(clang::Stmt *T) { + if (auto *D = + dyn_cast<clang::OMPCanonicalLoopNestTransformationDirective>(T)) + return static_cast<clang::OMPLoopTransformationDirective *>(D); + if (auto *D = + dyn_cast<clang::OMPCanonicalLoopSequenceTransformationDirective>(T)) + return static_cast<clang::OMPLoopTransformationDirective *>(D); + llvm_unreachable("unexpected type"); + } +}; +template <> +struct CastInfo<clang::OMPLoopTransformationDirective, const clang::Stmt *> + : public ConstStrippingForwardingCast< + clang::OMPLoopTransformationDirective, const clang::Stmt *, + CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>> {}; + +} // namespace llvm + #endif diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def index d3dff446..089a72b 100644 --- a/clang/include/clang/Basic/AMDGPUTypes.def +++ b/clang/include/clang/Basic/AMDGPUTypes.def @@ -21,6 +21,7 @@ #endif AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8) +AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_texture_t", AMDGPUTexture, AMDGPUTextureTy, 256, 256, 0) AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0) diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 9aad00b..b856ad1 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -34,6 +34,7 @@ // Q -> target builtin type, followed by a character to distinguish the builtin type // Qa -> AArch64 svcount_t builtin type. // Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type. +// Qt -> AMDGPU __amdgpu_texture_t builtin type. // E -> ext_vector, followed by the number of elements and the base type. // X -> _Complex, followed by the base type. // Y -> ptrdiff_t diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index af26a04..e540040 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -25,6 +25,7 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Compiler.h" #include <cassert> @@ -1367,6 +1368,22 @@ inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, } inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, + const llvm::APSInt &Int) { + DB.AddString(toString(Int, /*Radix=*/10, Int.isSigned(), + /*formatAsCLiteral=*/false, + /*UpperCase=*/true, /*InsertSeparators=*/true)); + return DB; +} + +inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, + const llvm::APInt &Int) { + DB.AddString(toString(Int, /*Radix=*/10, /*Signed=*/false, + /*formatAsCLiteral=*/false, + /*UpperCase=*/true, /*InsertSeparators=*/true)); + return DB; +} + +inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, int I) { DB.AddTaggedVal(I, DiagnosticsEngine::ak_sint); return DB; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 4d9e123..c724136 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1141,7 +1141,7 @@ def warn_cxx23_compat_binding_pack : Warning< def err_capture_default_first : Error< "capture default must be first">; def ext_decl_attrs_on_lambda : ExtWarn< - "%select{an attribute specifier sequence|%0}1 in this position " + "%select{an attribute specifier sequence|%1}0 in this position " "is a C++23 extension">, InGroup<CXX23AttrsOnLambda>; def ext_lambda_missing_parens : ExtWarn< "lambda without a parameter clause is a C++23 extension">, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index dc4c6d3..b157cbb 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5770,8 +5770,10 @@ def err_template_recursion_depth_exceeded : Error< def err_constraint_depends_on_self : Error<"satisfaction of constraint %0 depends on itself">, NoSFINAE; -def note_template_recursion_depth : Note< - "use -ftemplate-depth=N to increase recursive template instantiation depth">; +def note_template_recursion_depth + : Note<"use -ftemplate-depth=N to increase recursive template " + "instantiation depth">, + NoSFINAE; def err_template_instantiate_within_definition : Error< "%select{implicit|explicit}0 instantiation of template %1 within its" @@ -11761,6 +11763,18 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; +def err_omp_not_a_loop_sequence + : Error<"statement after '#pragma omp %0' must be a loop sequence " + "containing canonical loops or loop-generating constructs">; +def err_omp_empty_loop_sequence + : Error<"loop sequence after '#pragma omp %0' must contain at least 1 " + "canonical loop or loop-generating construct">; +def err_omp_invalid_looprange + : Error<"looprange clause selects loops from %1 to %2 but this exceeds the " + "number of loops (%3) in the loop sequence">; +def warn_omp_redundant_fusion : Warning<"looprange clause selects a single " + "loop, resulting in redundant fusion">, + InGroup<OpenMPClauses>; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 4c988e0..ed89a31 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -391,6 +391,13 @@ bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind); bool isOpenMPCanonicalLoopNestTransformationDirective( OpenMPDirectiveKind DKind); +/// Checks if the specified directive is a loop transformation directive that +/// applies to a canonical loop sequence. +/// \param DKind Specified directive. +/// \return True iff the directive is a loop transformation. +bool isOpenMPCanonicalLoopSequenceTransformationDirective( + OpenMPDirectiveKind DKind); + /// Checks if the specified directive is a loop transformation directive. /// \param DKind Specified directive. /// \return True iff the directive is a loop transformation. diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index dd1a244..bf3686b 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -238,6 +238,10 @@ def OMPUnrollDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>; def OMPReverseDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>; def OMPInterchangeDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>; +def OMPCanonicalLoopSequenceTransformationDirective + : StmtNode<OMPExecutableDirective, 1>; +def OMPFuseDirective + : StmtNode<OMPCanonicalLoopSequenceTransformationDirective>; def OMPForDirective : StmtNode<OMPLoopDirective>; def OMPForSimdDirective : StmtNode<OMPLoopDirective>; def OMPSectionsDirective : StmtNode<OMPExecutableDirective>; diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index bb39444..e1be08c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -683,8 +683,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [ //===----------------------------------------------------------------------===// defvar CIR_YieldableScopes = [ - "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp", - "SwitchOp", "TernaryOp", "WhileOp" + "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp", + "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp" ]; def CIR_YieldOp : CIR_Op<"yield", [ @@ -1776,7 +1776,9 @@ def CIR_GlobalLinkageKind : CIR_I32EnumAttr< // is upstreamed. def CIR_GlobalOp : CIR_Op<"global", [ - DeclareOpInterfaceMethods<CIRGlobalValueInterface> + DeclareOpInterfaceMethods<RegionBranchOpInterface>, + DeclareOpInterfaceMethods<CIRGlobalValueInterface>, + NoRegionArguments ]> { let summary = "Declare or define a global variable"; let description = [{ @@ -1807,6 +1809,9 @@ def CIR_GlobalOp : CIR_Op<"global", [ UnitAttr:$dso_local, OptionalAttr<I64Attr>:$alignment); + let regions = (region MaxSizedRegion<1>:$ctorRegion, + MaxSizedRegion<1>:$dtorRegion); + let assemblyFormat = [{ ($sym_visibility^)? (`` $global_visibility^)? @@ -1815,24 +1820,34 @@ def CIR_GlobalOp : CIR_Op<"global", [ (`comdat` $comdat^)? (`dso_local` $dso_local^)? $sym_name - custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value) + custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value, + $ctorRegion, $dtorRegion) attr-dict }]; let extraClassDeclaration = [{ - bool isDeclaration() { return !getInitialValue(); } + bool isDeclaration() { + return !getInitialValue() && getCtorRegion().empty() && getDtorRegion().empty(); + } bool hasInitializer() { return !isDeclaration(); } }]; let skipDefaultBuilders = 1; - let builders = [OpBuilder<(ins - "llvm::StringRef":$sym_name, - "mlir::Type":$sym_type, - CArg<"bool", "false">:$isConstant, - // CIR defaults to external linkage. - CArg<"cir::GlobalLinkageKind", - "cir::GlobalLinkageKind::ExternalLinkage">:$linkage)>]; + let builders = [ + OpBuilder<(ins + "llvm::StringRef":$sym_name, + "mlir::Type":$sym_type, + CArg<"bool", "false">:$isConstant, + // CIR defaults to external linkage. + CArg<"cir::GlobalLinkageKind", + "cir::GlobalLinkageKind::ExternalLinkage">:$linkage, + CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>", + "nullptr">:$ctorBuilder, + CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>", + "nullptr">:$dtorBuilder) + > + ]; let hasVerifier = 1; diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 0fac1b2..7e59989 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -208,6 +208,7 @@ struct MissingFeatures { static bool dataLayoutTypeAllocSize() { return false; } static bool dataLayoutTypeStoreSize() { return false; } static bool deferredCXXGlobalInit() { return false; } + static bool deleteArray() { return false; } static bool devirtualizeMemberFunction() { return false; } static bool ehCleanupFlags() { return false; } static bool ehCleanupScope() { return false; } @@ -219,6 +220,7 @@ struct MissingFeatures { static bool emitCondLikelihoodViaExpectIntrinsic() { return false; } static bool emitLifetimeMarkers() { return false; } static bool emitLValueAlignmentAssumption() { return false; } + static bool emitNullCheckForDeleteCalls() { return false; } static bool emitNullabilityCheck() { return false; } static bool emitTypeCheck() { return false; } static bool emitTypeMetadataCodeForVCall() { return false; } diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 30edd30..e301cf1 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6767,6 +6767,9 @@ private: OpenMPClauseKind Kind, bool ParseOnly); + /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. + OMPClause *ParseOpenMPLoopRangeClause(); + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2bd6be2..f53aafd 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13335,8 +13335,6 @@ public: Sema &SemaRef; bool Invalid; bool AlreadyInstantiating; - bool CheckInstantiationDepth(SourceLocation PointOfInstantiation, - SourceRange InstantiationRange); InstantiatingTemplate(Sema &SemaRef, CodeSynthesisContext::SynthesisKind Kind, @@ -13529,7 +13527,7 @@ public: ~ArgPackSubstIndexRAII() { Self.ArgPackSubstIndex = OldSubstIndex; } }; - void pushCodeSynthesisContext(CodeSynthesisContext Ctx); + bool pushCodeSynthesisContext(CodeSynthesisContext Ctx); void popCodeSynthesisContext(); void PrintContextStack(InstantiationContextDiagFuncRef DiagFunc) { diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index c0fd7a6..daf58b1 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -463,6 +463,13 @@ public: Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + + /// Called on well-formed '#pragma omp fuse' after parsing of its + /// clauses and the associated statement. + StmtResult ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult @@ -921,6 +928,12 @@ public: SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Called on well-form 'looprange' clause after parsing its arguments. + OMPClause * + ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc); /// Called on well-formed 'ordered' clause. OMPClause * ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, @@ -1485,7 +1498,81 @@ private: bool checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers, - Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits); + Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits); + + /// Holds the result of the analysis of a (possibly canonical) loop. + struct LoopAnalysis { + /// The analyzed loop or loop transformation. + Stmt *AStmt = nullptr; + /// Loop analyses results. + OMPLoopBasedDirective::HelperExprs HelperExprs; + /// The for-statement of the loop. TheForStmt equals AStmt only when the + /// latter is a canonical loop (i.e. not a loop transformation). + Stmt *TheForStmt = nullptr; + /// Initialization statements before transformations. + SmallVector<Stmt *> OriginalInits; + /// Initialization statements required after transformation of this loop. + SmallVector<Stmt *> TransformsPreInits; + + explicit LoopAnalysis(Stmt *S) : AStmt(S) {} + + bool isRegularLoop() const { return isRegularLoop(AStmt); } + bool isLoopTransformation() const { return isLoopTransformation(AStmt); } + + // Convenience functions used when building LoopSequenceAnalysis. + static bool isRegularLoop(Stmt *S) { + return isa<ForStmt, CXXForRangeStmt>(S); + } + static bool isLoopTransformation(Stmt *S) { + return isa<OMPLoopTransformationDirective>(S); + } + }; + + /// Holds the result of the analysis of a (possibly canonical) loop sequence. + struct LoopSequenceAnalysis { + /// Number of top level canonical loops. + unsigned LoopSeqSize = 0; + /// For each loop results of the analysis. + SmallVector<LoopAnalysis, 2> Loops; + /// Additional code required before entering the transformed loop sequence. + SmallVector<Stmt *> LoopSequencePreInits; + + // Convenience function used when building the LoopSequenceAnalysis. + static bool isLoopSequenceDerivation(Stmt *S) { + return LoopAnalysis::isRegularLoop(S) || + LoopAnalysis::isLoopTransformation(S); + } + }; + + /// The main recursive process of `checkTransformableLoopSequence` that + /// performs grammatical parsing of a canonical loop sequence. It extracts + /// key information, such as the number of top-level loops, loop statements, + /// helper expressions, and other relevant loop-related data, all in a single + /// execution to avoid redundant traversals. This analysis flattens inner + /// Loop Sequences + /// + /// \param LoopSeqStmt The AST of the original statement. + /// \param SeqAnalysis [out] Result of the analysis of \p LoopSeqStmt + /// \param Context + /// \param Kind The loop transformation directive kind. + /// \return Whether the original statement is both syntactically and + /// semantically correct according to OpenMP 6.0 canonical loop + /// sequence definition. + bool analyzeLoopSequence(Stmt *LoopSeqStmt, LoopSequenceAnalysis &SeqAnalysis, + ASTContext &Context, OpenMPDirectiveKind Kind); + + /// Validates and checks whether a loop sequence can be transformed according + /// to the given directive, providing necessary setup and initialization + /// (Driver function) before recursion using `analyzeLoopSequence`. + /// + /// \param Kind The loop transformation directive kind. + /// \param AStmt The AST of the original statement + /// \param SeqAnalysis [out] Result of the analysis of \p LoopSeqStmt + /// \param Context + /// \return Whether there was an absence of errors or not + bool checkTransformableLoopSequence(OpenMPDirectiveKind Kind, Stmt *AStmt, + LoopSequenceAnalysis &SeqAnalysis, + ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 441047d..99864c7 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1951,6 +1951,7 @@ enum StmtCode { STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_INTERCHANGE_DIRECTIVE, + STMT_OMP_FUSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 61dd330..0fd0e7e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12590,6 +12590,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, Type = Context.AMDGPUBufferRsrcTy; break; } + case 't': { + Type = Context.AMDGPUTextureTy; + break; + } default: llvm_unreachable("Unexpected target builtin type"); } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 891344d..a2e97fc 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1294,95 +1294,6 @@ static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_ia32_bextr(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Index = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - uint64_t Shift = Index.extractBitsAsZExtValue(8, 0); - uint64_t Length = Index.extractBitsAsZExtValue(8, 8); - Length = Length > BitWidth ? BitWidth : Length; - - // Handle out of bounds cases. - if (Length == 0 || Shift >= BitWidth) { - pushInteger(S, 0, Call->getType()); - return true; - } - - uint64_t Result = Val.getZExtValue() >> Shift; - Result &= llvm::maskTrailingOnes<uint64_t>(Length); - pushInteger(S, Result, Call->getType()); - return true; -} - -static bool interp__builtin_ia32_bzhi(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - QualType CallType = Call->getType(); - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType() || - !CallType->isIntegerType()) - return false; - - APSInt Idx = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - uint64_t Index = Idx.extractBitsAsZExtValue(8, 0); - - if (Index < BitWidth) - Val.clearHighBits(BitWidth - Index); - - pushInteger(S, Val, CallType); - return true; -} - -static bool interp__builtin_ia32_pdep(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Mask = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - APInt Result = APInt::getZero(BitWidth); - for (unsigned I = 0, P = 0; I != BitWidth; ++I) { - if (Mask[I]) - Result.setBitVal(I, Val[P++]); - } - pushInteger(S, std::move(Result), Call->getType()); - return true; -} - -static bool interp__builtin_ia32_pext(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Mask = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - APInt Result = APInt::getZero(BitWidth); - for (unsigned I = 0, P = 0; I != BitWidth; ++I) { - if (Mask[I]) - Result.setBitVal(P++, Val[I]); - } - pushInteger(S, std::move(Result), Call->getType()); - return true; -} - /// (CarryIn, LHS, RHS, Result) static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S, CodePtr OpPC, @@ -3275,11 +3186,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_bextr_u64: case clang::X86::BI__builtin_ia32_bextri_u32: case clang::X86::BI__builtin_ia32_bextri_u64: - return interp__builtin_ia32_bextr(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) { + unsigned BitWidth = Val.getBitWidth(); + uint64_t Shift = Idx.extractBitsAsZExtValue(8, 0); + uint64_t Length = Idx.extractBitsAsZExtValue(8, 8); + if (Length > BitWidth) { + Length = BitWidth; + } + + // Handle out of bounds cases. + if (Length == 0 || Shift >= BitWidth) + return APInt(BitWidth, 0); + + uint64_t Result = Val.getZExtValue() >> Shift; + Result &= llvm::maskTrailingOnes<uint64_t>(Length); + return APInt(BitWidth, Result); + }); case clang::X86::BI__builtin_ia32_bzhi_si: case clang::X86::BI__builtin_ia32_bzhi_di: - return interp__builtin_ia32_bzhi(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) { + unsigned BitWidth = Val.getBitWidth(); + uint64_t Index = Idx.extractBitsAsZExtValue(8, 0); + APSInt Result = Val; + + if (Index < BitWidth) + Result.clearHighBits(BitWidth - Index); + + return Result; + }); case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: @@ -3299,11 +3236,33 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pdep_si: case clang::X86::BI__builtin_ia32_pdep_di: - return interp__builtin_ia32_pdep(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) { + unsigned BitWidth = Val.getBitWidth(); + APInt Result = APInt::getZero(BitWidth); + + for (unsigned I = 0, P = 0; I != BitWidth; ++I) { + if (Mask[I]) + Result.setBitVal(I, Val[P++]); + } + + return Result; + }); case clang::X86::BI__builtin_ia32_pext_si: case clang::X86::BI__builtin_ia32_pext_di: - return interp__builtin_ia32_pext(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) { + unsigned BitWidth = Val.getBitWidth(); + APInt Result = APInt::getZero(BitWidth); + + for (unsigned I = 0, P = 0; I != BitWidth; ++I) { + if (Mask[I]) + Result.setBitVal(P++, Val[I]); + } + + return Result; + }); case clang::X86::BI__builtin_ia32_addcarryx_u32: case clang::X86::BI__builtin_ia32_addcarryx_u64: diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index af89b66..cd738ce 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -262,6 +262,7 @@ public: case Storage::Typeid: return false; } + llvm_unreachable("Unknown clang::interp::Storage enum"); } /// Checks if the pointer is live. bool isLive() const { diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 55b93e1..2ce4419 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { return new (C) OMPPartialClause(); } +OMPLoopRangeClause * +OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc, + Expr *First, Expr *Count) { + OMPLoopRangeClause *Clause = CreateEmpty(C); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setFirstLoc(FirstLoc); + Clause->setCountLoc(CountLoc); + Clause->setLocEnd(EndLoc); + Clause->setFirst(First); + Clause->setCount(Count); + return Clause; +} + +OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { + return new (C) OMPLoopRangeClause(); +} + OMPAllocateClause *OMPAllocateClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc, @@ -1964,6 +1984,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) { } } +void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) { + OS << "looprange"; + + Expr *First = Node->getFirst(); + Expr *Count = Node->getCount(); + + if (First && Count) { + OS << "("; + First->printPretty(OS, nullptr, Policy, 0); + OS << ","; + Count->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 1f6586f..a5b0cd3 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -125,13 +125,12 @@ OMPLoopBasedDirective::tryToFindNextInnerLoop(Stmt *CurStmt, bool OMPLoopBasedDirective::doForAllLoops( Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops, llvm::function_ref<bool(unsigned, Stmt *)> Callback, - llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)> + llvm::function_ref<void(OMPLoopTransformationDirective *)> OnTransformationCallback) { CurStmt = CurStmt->IgnoreContainers(); for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) { while (true) { - auto *Dir = - dyn_cast<OMPCanonicalLoopNestTransformationDirective>(CurStmt); + auto *Dir = dyn_cast<OMPLoopTransformationDirective>(CurStmt); if (!Dir) break; @@ -371,6 +370,22 @@ OMPForDirective *OMPForDirective::Create( return Dir; } +Stmt *OMPLoopTransformationDirective::getTransformedStmt() const { + if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S)) + return D->getTransformedStmt(); + if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) + return D->getTransformedStmt(); + llvm_unreachable("unexpected object type"); +} + +Stmt *OMPLoopTransformationDirective::getPreInits() const { + if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S)) + return D->getPreInits(); + if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) + return D->getPreInits(); + llvm_unreachable("unexpected object type"); +} + Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const { switch (getStmtClass()) { #define STMT(CLASS, PARENT) @@ -380,7 +395,7 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const { return static_cast<const CLASS *>(this)->getTransformedStmt(); #include "clang/AST/StmtNodes.inc" default: - llvm_unreachable("Not a loop transformation"); + llvm_unreachable("Not a loop transformation for canonical loop nests"); } } @@ -393,7 +408,34 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getPreInits() const { return static_cast<const CLASS *>(this)->getPreInits(); #include "clang/AST/StmtNodes.inc" default: - llvm_unreachable("Not a loop transformation"); + llvm_unreachable("Not a loop transformation for canonical loop nests"); + } +} + +Stmt * +OMPCanonicalLoopSequenceTransformationDirective::getTransformedStmt() const { + switch (getStmtClass()) { +#define STMT(CLASS, PARENT) +#define ABSTRACT_STMT(CLASS) +#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + case Stmt::CLASS##Class: \ + return static_cast<const CLASS *>(this)->getTransformedStmt(); +#include "clang/AST/StmtNodes.inc" + default: + llvm_unreachable("Not a loop transformation for canonical loop sequences"); + } +} + +Stmt *OMPCanonicalLoopSequenceTransformationDirective::getPreInits() const { + switch (getStmtClass()) { +#define STMT(CLASS, PARENT) +#define ABSTRACT_STMT(CLASS) +#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + case Stmt::CLASS##Class: \ + return static_cast<const CLASS *>(this)->getPreInits(); +#include "clang/AST/StmtNodes.inc" + default: + llvm_unreachable("Not a loop transformation for canonical loop sequences"); } } @@ -510,6 +552,27 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, SourceLocation(), SourceLocation(), NumLoops); } +OMPFuseDirective *OMPFuseDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef<OMPClause *> Clauses, unsigned NumGeneratedTopLevelLoops, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) { + + OMPFuseDirective *Dir = createDirective<OMPFuseDirective>( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + Dir->setNumGeneratedTopLevelLoops(NumGeneratedTopLevelLoops); + return Dir; +} + +OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses) { + OMPFuseDirective *Dir = createEmptyDirective<OMPFuseDirective>( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation()); + return Dir; +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 2c9c358..586c300 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -795,6 +795,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) { + Indent() << "#pragma omp fuse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 8b3af94..589a156 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -510,6 +510,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) { Profiler->VisitExpr(Factor); } +void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + if (const Expr *First = C->getFirst()) + Profiler->VisitExpr(First); + if (const Expr *Count = C->getCount()) + Profiler->VisitExpr(Count); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); @@ -1025,6 +1032,15 @@ void StmtProfiler::VisitOMPInterchangeDirective( VisitOMPCanonicalLoopNestTransformationDirective(S); } +void StmtProfiler::VisitOMPCanonicalLoopSequenceTransformationDirective( + const OMPCanonicalLoopSequenceTransformationDirective *S) { + VisitOMPExecutableDirective(S); +} + +void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) { + VisitOMPCanonicalLoopSequenceTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 387026e..64b2bff 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -282,6 +282,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -627,6 +628,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -755,9 +757,14 @@ bool clang::isOpenMPCanonicalLoopNestTransformationDirective( DKind == OMPD_interchange || DKind == OMPD_stripe; } +bool clang::isOpenMPCanonicalLoopSequenceTransformationDirective( + OpenMPDirectiveKind DKind) { + return DKind == OMPD_fuse; +} + bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { - // FIXME: There will be more cases when we implement 'fuse'. - return isOpenMPCanonicalLoopNestTransformationDirective(DKind); + return isOpenMPCanonicalLoopNestTransformationDirective(DKind) || + isOpenMPCanonicalLoopSequenceTransformationDirective(DKind); } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index cb8fe6c..9d12a13 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -951,28 +951,37 @@ Address CIRGenFunction::getAddressOfBaseClass( bool nullCheckValue, SourceLocation loc) { assert(!path.empty() && "Base path should not be empty!"); + CastExpr::path_const_iterator start = path.begin(); + const CXXRecordDecl *vBase = nullptr; + if ((*path.begin())->isVirtual()) { - // The implementation here is actually complete, but let's flag this - // as an error until the rest of the virtual base class support is in place. - cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base"); - return Address::invalid(); + vBase = (*start)->getType()->castAsCXXRecordDecl(); + ++start; } // Compute the static offset of the ultimate destination within its // allocating subobject (the virtual base, if there is one, or else // the "complete" object that we see). - CharUnits nonVirtualOffset = - cgm.computeNonVirtualBaseClassOffset(derived, path); + CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset( + vBase ? vBase : derived, {start, path.end()}); + + // If there's a virtual step, we can sometimes "devirtualize" it. + // For now, that's limited to when the derived type is final. + // TODO: "devirtualize" this for accesses to known-complete objects. + if (vBase && derived->hasAttr<FinalAttr>()) { + const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived); + CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase); + nonVirtualOffset += vBaseOffset; + vBase = nullptr; // we no longer have a virtual step + } // Get the base pointer type. mlir::Type baseValueTy = convertType((path.end()[-1])->getType()); assert(!cir::MissingFeatures::addressSpace()); - // The if statement here is redundant now, but it will be needed when we add - // support for virtual base classes. // If there is no virtual base, use cir.base_class_addr. It takes care of // the adjustment and the null pointer check. - if (nonVirtualOffset.isZero()) { + if (nonVirtualOffset.isZero() && !vBase) { assert(!cir::MissingFeatures::sanitizers()); return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0, /*assumeNotNull=*/true); @@ -980,10 +989,17 @@ Address CIRGenFunction::getAddressOfBaseClass( assert(!cir::MissingFeatures::sanitizers()); - // Apply the offset - value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, - nonVirtualOffset.getQuantity(), - /*assumeNotNull=*/true); + // Compute the virtual offset. + mlir::Value virtualOffset = nullptr; + if (vBase) { + virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset( + getLoc(loc), *this, value, derived, vBase); + } + + // Apply both offsets. + value = applyNonVirtualAndVirtualOffset( + getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived, + vBase, baseValueTy, not nullCheckValue); // Cast to the destination type. value = value.withElementType(builder, baseValueTy); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 1f7e3dd..83208bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -210,6 +210,60 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall( return emitCall(fnInfo, callee, returnValue, args, nullptr, loc); } +namespace { +/// The parameters to pass to a usual operator delete. +struct UsualDeleteParams { + TypeAwareAllocationMode typeAwareDelete = TypeAwareAllocationMode::No; + bool destroyingDelete = false; + bool size = false; + AlignedAllocationMode alignment = AlignedAllocationMode::No; +}; +} // namespace + +// FIXME(cir): this should be shared with LLVM codegen +static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *fd) { + UsualDeleteParams params; + + const FunctionProtoType *fpt = fd->getType()->castAs<FunctionProtoType>(); + auto ai = fpt->param_type_begin(), ae = fpt->param_type_end(); + + if (fd->isTypeAwareOperatorNewOrDelete()) { + params.typeAwareDelete = TypeAwareAllocationMode::Yes; + assert(ai != ae); + ++ai; + } + + // The first argument after the type-identity parameter (if any) is + // always a void* (or C* for a destroying operator delete for class + // type C). + ++ai; + + // The next parameter may be a std::destroying_delete_t. + if (fd->isDestroyingOperatorDelete()) { + params.destroyingDelete = true; + assert(ai != ae); + ++ai; + } + + // Figure out what other parameters we should be implicitly passing. + if (ai != ae && (*ai)->isIntegerType()) { + params.size = true; + ++ai; + } else { + assert(!isTypeAwareAllocation(params.typeAwareDelete)); + } + + if (ai != ae && (*ai)->isAlignValT()) { + params.alignment = AlignedAllocationMode::Yes; + ++ai; + } else { + assert(!isTypeAwareAllocation(params.typeAwareDelete)); + } + + assert(ai == ae && "unexpected usual deallocation function parameter"); + return params; +} + static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, unsigned minElements, mlir::Value &numElements, @@ -332,6 +386,117 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf, return rv; } +namespace { +/// Calls the given 'operator delete' on a single object. +struct CallObjectDelete final : EHScopeStack::Cleanup { + mlir::Value ptr; + const FunctionDecl *operatorDelete; + QualType elementType; + + CallObjectDelete(mlir::Value ptr, const FunctionDecl *operatorDelete, + QualType elementType) + : ptr(ptr), operatorDelete(operatorDelete), elementType(elementType) {} + + void emit(CIRGenFunction &cgf) override { + cgf.emitDeleteCall(operatorDelete, ptr, elementType); + } + + // This is a placeholder until EHCleanupScope is implemented. + size_t getSize() const override { + assert(!cir::MissingFeatures::ehCleanupScope()); + return sizeof(CallObjectDelete); + } +}; +} // namespace + +/// Emit the code for deleting a single object. +static void emitObjectDelete(CIRGenFunction &cgf, const CXXDeleteExpr *de, + Address ptr, QualType elementType) { + // C++11 [expr.delete]p3: + // If the static type of the object to be deleted is different from its + // dynamic type, the static type shall be a base class of the dynamic type + // of the object to be deleted and the static type shall have a virtual + // destructor or the behavior is undefined. + assert(!cir::MissingFeatures::emitTypeCheck()); + + const FunctionDecl *operatorDelete = de->getOperatorDelete(); + assert(!operatorDelete->isDestroyingOperatorDelete()); + + // Find the destructor for the type, if applicable. If the + // destructor is virtual, we'll just emit the vcall and return. + const CXXDestructorDecl *dtor = nullptr; + if (const auto *rd = elementType->getAsCXXRecordDecl()) { + if (rd->hasDefinition() && !rd->hasTrivialDestructor()) { + dtor = rd->getDestructor(); + + if (dtor->isVirtual()) { + cgf.cgm.errorNYI(de->getSourceRange(), + "emitObjectDelete: virtual destructor"); + } + } + } + + // Make sure that we call delete even if the dtor throws. + // This doesn't have to a conditional cleanup because we're going + // to pop it off in a second. + cgf.ehStack.pushCleanup<CallObjectDelete>( + NormalAndEHCleanup, ptr.getPointer(), operatorDelete, elementType); + + if (dtor) { + cgf.emitCXXDestructorCall(dtor, Dtor_Complete, + /*ForVirtualBase=*/false, + /*Delegating=*/false, ptr, elementType); + } else if (elementType.getObjCLifetime()) { + assert(!cir::MissingFeatures::objCLifetime()); + cgf.cgm.errorNYI(de->getSourceRange(), "emitObjectDelete: ObjCLifetime"); + } + + // In traditional LLVM codegen null checks are emitted to save a delete call. + // In CIR we optimize for size by default, the null check should be added into + // this function callers. + assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls()); + + cgf.popCleanupBlock(); +} + +void CIRGenFunction::emitCXXDeleteExpr(const CXXDeleteExpr *e) { + const Expr *arg = e->getArgument(); + Address ptr = emitPointerWithAlignment(arg); + + // Null check the pointer. + // + // We could avoid this null check if we can determine that the object + // destruction is trivial and doesn't require an array cookie; we can + // unconditionally perform the operator delete call in that case. For now, we + // assume that deleted pointers are null rarely enough that it's better to + // keep the branch. This might be worth revisiting for a -O0 code size win. + // + // CIR note: emit the code size friendly by default for now, such as mentioned + // in `emitObjectDelete`. + assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls()); + QualType deleteTy = e->getDestroyedType(); + + // A destroying operator delete overrides the entire operation of the + // delete expression. + if (e->getOperatorDelete()->isDestroyingOperatorDelete()) { + cgm.errorNYI(e->getSourceRange(), + "emitCXXDeleteExpr: destroying operator delete"); + return; + } + + // We might be deleting a pointer to array. + deleteTy = getContext().getBaseElementType(deleteTy); + ptr = ptr.withElementType(builder, convertTypeForMem(deleteTy)); + + if (e->isArrayForm()) { + assert(!cir::MissingFeatures::deleteArray()); + cgm.errorNYI(e->getSourceRange(), "emitCXXDeleteExpr: array delete"); + return; + } else { + emitObjectDelete(*this, e, ptr, deleteTy); + } +} + mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { // The element type being allocated. QualType allocType = getContext().getBaseElementType(e->getAllocatedType()); @@ -443,3 +608,53 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { allocSizeWithoutCookie); return result.getPointer(); } + +void CIRGenFunction::emitDeleteCall(const FunctionDecl *deleteFD, + mlir::Value ptr, QualType deleteTy) { + assert(!cir::MissingFeatures::deleteArray()); + + const auto *deleteFTy = deleteFD->getType()->castAs<FunctionProtoType>(); + CallArgList deleteArgs; + + UsualDeleteParams params = getUsualDeleteParams(deleteFD); + auto paramTypeIt = deleteFTy->param_type_begin(); + + // Pass std::type_identity tag if present + if (isTypeAwareAllocation(params.typeAwareDelete)) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: type aware delete"); + + // Pass the pointer itself. + QualType argTy = *paramTypeIt++; + mlir::Value deletePtr = + builder.createBitcast(ptr.getLoc(), ptr, convertType(argTy)); + deleteArgs.add(RValue::get(deletePtr), argTy); + + // Pass the std::destroying_delete tag if present. + if (params.destroyingDelete) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: destroying delete"); + + // Pass the size if the delete function has a size_t parameter. + if (params.size) { + QualType sizeType = *paramTypeIt++; + CharUnits deleteTypeSize = getContext().getTypeSizeInChars(deleteTy); + assert(mlir::isa<cir::IntType>(convertType(sizeType)) && + "expected cir::IntType"); + cir::ConstantOp size = builder.getConstInt( + *currSrcLoc, convertType(sizeType), deleteTypeSize.getQuantity()); + + deleteArgs.add(RValue::get(size), sizeType); + } + + // Pass the alignment if the delete function has an align_val_t parameter. + if (isAlignedAllocation(params.alignment)) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: aligned allocation"); + + assert(paramTypeIt == deleteFTy->param_type_end() && + "unknown parameter to usual delete function"); + + // Emit the call to delete. + emitNewDeleteCall(*this, deleteFD, deleteFTy, deleteArgs); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index bd09d78..f4bbced 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -676,6 +676,10 @@ public: mlir::Value VisitRealImag(const UnaryOperator *e, QualType promotionType = QualType()); + mlir::Value VisitUnaryExtension(const UnaryOperator *e) { + return Visit(e->getSubExpr()); + } + mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) { CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die); return Visit(die->getExpr()); @@ -687,6 +691,10 @@ public: mlir::Value VisitCXXNewExpr(const CXXNewExpr *e) { return cgf.emitCXXNewExpr(e); } + mlir::Value VisitCXXDeleteExpr(const CXXDeleteExpr *e) { + cgf.emitCXXDeleteExpr(e); + return {}; + } mlir::Value VisitCXXThrowExpr(const CXXThrowExpr *e) { cgf.emitCXXThrowExpr(e); @@ -1274,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e, } else if (const auto *uo = dyn_cast<UnaryOperator>(e)) { switch (uo->getOpcode()) { case UO_Imag: - cgf.cgm.errorNYI(e->getSourceRange(), - "ScalarExprEmitter::emitPromoted unary imag"); - return {}; case UO_Real: return VisitRealImag(uo, promotionType); case UO_Minus: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 166435f..ef07db3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1197,6 +1197,8 @@ public: bool delegating, Address thisAddr, CallArgList &args, clang::SourceLocation loc); + void emitCXXDeleteExpr(const CXXDeleteExpr *e); + void emitCXXDestructorCall(const CXXDestructorDecl *dd, CXXDtorType type, bool forVirtualBase, bool delegating, Address thisAddr, QualType thisTy); @@ -1244,6 +1246,9 @@ public: void emitDelegatingCXXConstructorCall(const CXXConstructorDecl *ctor, const FunctionArgList &args); + void emitDeleteCall(const FunctionDecl *deleteFD, mlir::Value ptr, + QualType deleteTy); + mlir::LogicalResult emitDoStmt(const clang::DoStmt &s); /// Emit an expression as an initializer for an object (variable, field, etc.) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index eef23a0..c977ff9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -119,6 +119,19 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, cir::OptInfoAttr::get(&mlirContext, cgo.OptimizationLevel, cgo.OptimizeSize)); + // Set the module name to be the name of the main file. TranslationUnitDecl + // often contains invalid source locations and isn't a reliable source for the + // module location. + FileID mainFileId = astContext.getSourceManager().getMainFileID(); + const FileEntry &mainFile = + *astContext.getSourceManager().getFileEntryForID(mainFileId); + StringRef path = mainFile.tryGetRealPathName(); + if (!path.empty()) { + theModule.setSymName(path); + theModule->setLoc(mlir::FileLineColLoc::get(&mlirContext, path, + /*line=*/0, + /*column=*/0)); + } } CIRGenModule::~CIRGenModule() = default; diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index e842892..644c383 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::OMPSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPUnrollDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPForDirectiveClass: case Stmt::OMPForSimdDirectiveClass: case Stmt::OMPSectionsDirectiveClass: diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 58ef500..fb87036 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1355,9 +1355,11 @@ mlir::LogicalResult cir::GlobalOp::verify() { return success(); } -void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, - llvm::StringRef sym_name, mlir::Type sym_type, - bool isConstant, cir::GlobalLinkageKind linkage) { +void cir::GlobalOp::build( + OpBuilder &odsBuilder, OperationState &odsState, llvm::StringRef sym_name, + mlir::Type sym_type, bool isConstant, cir::GlobalLinkageKind linkage, + function_ref<void(OpBuilder &, Location)> ctorBuilder, + function_ref<void(OpBuilder &, Location)> dtorBuilder) { odsState.addAttribute(getSymNameAttrName(odsState.name), odsBuilder.getStringAttr(sym_name)); odsState.addAttribute(getSymTypeAttrName(odsState.name), @@ -1370,26 +1372,88 @@ void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage); odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr); + Region *ctorRegion = odsState.addRegion(); + if (ctorBuilder) { + odsBuilder.createBlock(ctorRegion); + ctorBuilder(odsBuilder, odsState.location); + } + + Region *dtorRegion = odsState.addRegion(); + if (dtorBuilder) { + odsBuilder.createBlock(dtorRegion); + dtorBuilder(odsBuilder, odsState.location); + } + odsState.addAttribute(getGlobalVisibilityAttrName(odsState.name), cir::VisibilityAttr::get(odsBuilder.getContext())); } +/// Given the region at `index`, or the parent operation if `index` is None, +/// return the successor regions. These are the regions that may be selected +/// during the flow of control. `operands` is a set of optional attributes that +/// correspond to a constant value for each operand, or null if that operand is +/// not a constant. +void cir::GlobalOp::getSuccessorRegions( + mlir::RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> ®ions) { + // The `ctor` and `dtor` regions always branch back to the parent operation. + if (!point.isParent()) { + regions.push_back(RegionSuccessor()); + return; + } + + // Don't consider the ctor region if it is empty. + Region *ctorRegion = &this->getCtorRegion(); + if (ctorRegion->empty()) + ctorRegion = nullptr; + + // Don't consider the dtor region if it is empty. + Region *dtorRegion = &this->getCtorRegion(); + if (dtorRegion->empty()) + dtorRegion = nullptr; + + // If the condition isn't constant, both regions may be executed. + if (ctorRegion) + regions.push_back(RegionSuccessor(ctorRegion)); + if (dtorRegion) + regions.push_back(RegionSuccessor(dtorRegion)); +} + static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op, - TypeAttr type, - Attribute initAttr) { + TypeAttr type, Attribute initAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { + auto printType = [&]() { p << ": " << type; }; if (!op.isDeclaration()) { p << "= "; - // This also prints the type... - if (initAttr) - printConstant(p, initAttr); + if (!ctorRegion.empty()) { + p << "ctor "; + printType(); + p << " "; + p.printRegion(ctorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } else { + // This also prints the type... + if (initAttr) + printConstant(p, initAttr); + } + + if (!dtorRegion.empty()) { + p << " dtor "; + p.printRegion(dtorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } } else { - p << ": " << type; + printType(); } } -static ParseResult -parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, - Attribute &initialValueAttr) { +static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, + TypeAttr &typeAttr, + Attribute &initialValueAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { mlir::Type opTy; if (parser.parseOptionalEqual().failed()) { // Absence of equal means a declaration, so we need to parse the type. @@ -1397,16 +1461,38 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, if (parser.parseColonType(opTy)) return failure(); } else { - // Parse constant with initializer, examples: - // cir.global @y = #cir.fp<1.250000e+00> : !cir.double - // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> - if (parseConstantValue(parser, initialValueAttr).failed()) - return failure(); + // Parse contructor, example: + // cir.global @rgb = ctor : type { ... } + if (!parser.parseOptionalKeyword("ctor")) { + if (parser.parseColonType(opTy)) + return failure(); + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed()) + return failure(); + } else { + // Parse constant with initializer, examples: + // cir.global @y = 3.400000e+00 : f32 + // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> + if (parseConstantValue(parser, initialValueAttr).failed()) + return failure(); + + assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && + "Non-typed attrs shouldn't appear here."); + auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); + opTy = typedAttr.getType(); + } - assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && - "Non-typed attrs shouldn't appear here."); - auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); - opTy = typedAttr.getType(); + // Parse destructor, example: + // dtor { ... } + if (!parser.parseOptionalKeyword("dtor")) { + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed()) + return failure(); + } } typeAttr = TypeAttr::get(opTy); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 57db20f7..64f1917 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (std::optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( - [Options](ModulePassManager &MPM, OptimizationLevel Level) { - MPM.addPass(GCOVProfilerPass(*Options)); + [this, Options](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass( + GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr())); }); if (std::optional<InstrProfOptions> Options = getInstrProfOptions(CodeGenOpts, LangOpts)) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 12c7d48..fee6bc0 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -26,6 +26,7 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" +#include "clang/AST/LambdaCapture.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/VTableBuilder.h" @@ -1903,46 +1904,61 @@ CGDebugInfo::createInlinedSubprogram(StringRef FuncName, return SP; } +llvm::StringRef +CGDebugInfo::GetLambdaCaptureName(const LambdaCapture &Capture) { + if (Capture.capturesThis()) + return CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this"; + + assert(Capture.capturesVariable()); + + const ValueDecl *CaptureDecl = Capture.getCapturedVar(); + assert(CaptureDecl && "Expected valid decl for captured variable."); + + return CaptureDecl->getName(); +} + void CGDebugInfo::CollectRecordLambdaFields( const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements, llvm::DIType *RecordTy) { // For C++11 Lambdas a Field will be the same as a Capture, but the Capture // has the name and the location of the variable so we should iterate over // both concurrently. - const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(CXXDecl); RecordDecl::field_iterator Field = CXXDecl->field_begin(); unsigned fieldno = 0; for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(), E = CXXDecl->captures_end(); I != E; ++I, ++Field, ++fieldno) { - const LambdaCapture &C = *I; - if (C.capturesVariable()) { - SourceLocation Loc = C.getLocation(); - assert(!Field->isBitField() && "lambdas don't have bitfield members!"); - ValueDecl *V = C.getCapturedVar(); - StringRef VName = V->getName(); - llvm::DIFile *VUnit = getOrCreateFile(Loc); - auto Align = getDeclAlignIfRequired(V, CGM.getContext()); - llvm::DIType *FieldType = createFieldType( - VName, Field->getType(), Loc, Field->getAccess(), - layout.getFieldOffset(fieldno), Align, VUnit, RecordTy, CXXDecl); - elements.push_back(FieldType); - } else if (C.capturesThis()) { + const LambdaCapture &Capture = *I; + const uint64_t FieldOffset = + CGM.getContext().getASTRecordLayout(CXXDecl).getFieldOffset(fieldno); + + assert(!Field->isBitField() && "lambdas don't have bitfield members!"); + + SourceLocation Loc; + uint32_t Align = 0; + + if (Capture.capturesThis()) { // TODO: Need to handle 'this' in some way by probably renaming the // this of the lambda class and having a field member of 'this' or // by using AT_object_pointer for the function and having that be // used as 'this' for semantic references. - FieldDecl *f = *Field; - llvm::DIFile *VUnit = getOrCreateFile(f->getLocation()); - QualType type = f->getType(); - StringRef ThisName = - CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this"; - llvm::DIType *fieldType = createFieldType( - ThisName, type, f->getLocation(), f->getAccess(), - layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); - - elements.push_back(fieldType); + Loc = Field->getLocation(); + } else if (Capture.capturesVariable()) { + Loc = Capture.getLocation(); + + const ValueDecl *CaptureDecl = Capture.getCapturedVar(); + assert(CaptureDecl && "Expected valid decl for captured variable."); + + Align = getDeclAlignIfRequired(CaptureDecl, CGM.getContext()); + } else { + continue; } + + llvm::DIFile *VUnit = getOrCreateFile(Loc); + + elements.push_back(createFieldType( + GetLambdaCaptureName(Capture), Field->getType(), Loc, + Field->getAccess(), FieldOffset, Align, VUnit, RecordTy, CXXDecl)); } } diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index f860773..78c3eb9 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -397,6 +397,7 @@ private: void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F, SmallVectorImpl<llvm::Metadata *> &E, llvm::DICompositeType *RecordTy); + llvm::StringRef GetLambdaCaptureName(const LambdaCapture &Capture); /// If the C++ class has vtable info then insert appropriate debug /// info entry in EltTys vector. diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index e62bc76..92636f2 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -234,6 +234,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPInterchangeDirectiveClass: EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S)); break; + case Stmt::OMPFuseDirectiveClass: + EmitOMPFuseDirective(cast<OMPFuseDirective>(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast<OMPForDirective>(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ba9c7c6..efc06a2 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -201,6 +201,24 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } else { llvm_unreachable("Unknown loop-based directive kind."); } + doEmitPreinits(PreInits); + PreCondVars.restore(CGF); + } + + void + emitPreInitStmt(CodeGenFunction &CGF, + const OMPCanonicalLoopSequenceTransformationDirective &S) { + const Stmt *PreInits; + if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) { + PreInits = Fuse->getPreInits(); + } else { + llvm_unreachable( + "Unknown canonical loop sequence transform directive kind."); + } + doEmitPreinits(PreInits); + } + + void doEmitPreinits(const Stmt *PreInits) { if (PreInits) { // CompoundStmts and DeclStmts are used as lists of PreInit statements and // declarations. Since declarations must be visible in the the following @@ -222,7 +240,6 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { CGF.EmitStmt(S); } } - PreCondVars.restore(CGF); } public: @@ -230,6 +247,11 @@ public: : CodeGenFunction::RunCleanupsScope(CGF) { emitPreInitStmt(CGF, S); } + OMPLoopScope(CodeGenFunction &CGF, + const OMPCanonicalLoopSequenceTransformationDirective &S) + : CodeGenFunction::RunCleanupsScope(CGF) { + emitPreInitStmt(CGF, S); + } }; class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { @@ -1929,6 +1951,15 @@ public: CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); } + if (const auto *Dir = + dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) { + // For simplicity we reuse the loop scope similarly to what we do with + // OMPCanonicalLoopNestTransformationDirective do by being a subclass + // of OMPLoopBasedDirective. + Scope = new OMPLoopScope(CGF, *Dir); + CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); + CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); + } } ~OMPTransformDirectiveScopeRAII() { if (!Scope) @@ -1956,8 +1987,7 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, return; } if (SimplifiedS == NextLoop) { - if (auto *Dir = - dyn_cast<OMPCanonicalLoopNestTransformationDirective>(SimplifiedS)) + if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) SimplifiedS = Dir->getTransformedStmt(); if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) SimplifiedS = CanonLoop->getLoopStmt(); @@ -2952,6 +2982,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective( EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) { + // Emit the de-sugared statement + OMPTransformDirectiveScopeRAII FuseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 727487b..f0565c1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3861,6 +3861,7 @@ public: void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); + void EmitOMPFuseDirective(const OMPFuseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPScopeDirective(const OMPScopeDirective &S); diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 07cf08c..6596ec0 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, {Src0, Src1}); } +static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) { + if (AMDGCNScope == "agent") + return "device"; + if (AMDGCNScope == "wavefront") + return "subgroup"; + return AMDGCNScope; +} + // For processing memory ordering and memory scope arguments of various // amdgcn builtins. -// \p Order takes a C++11 comptabile memory-ordering specifier and converts +// \p Order takes a C++11 compatible memory-ordering specifier and converts // it into LLVM's memory ordering specifier using atomic C ABI, and writes // to \p AO. \p Scope takes a const char * and converts it into AMDGCN // specific SyncScopeID and writes it to \p SSID. @@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, // Some of the atomic builtins take the scope as a string name. StringRef scp; if (llvm::getConstantStringInfo(Scope, scp)) { + if (getTarget().getTriple().isSPIRV()) + scp = mapScopeToSPIRV(scp); SSID = getLLVMContext().getOrInsertSyncScopeID(scp); return; } @@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, SSID = llvm::SyncScope::System; break; case 1: // __MEMORY_SCOPE_DEVICE - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); break; case 2: // __MEMORY_SCOPE_WRKGRP SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); break; case 3: // __MEMORY_SCOPE_WVFRNT - SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); break; case 4: // __MEMORY_SCOPE_SINGLE SSID = llvm::SyncScope::SingleThread; @@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // // The global/flat cases need to use agent scope to consistently produce // the native instruction instead of a cmpxchg expansion. - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f110dba..85a13357 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6613,6 +6613,9 @@ std::string Driver::GetStdModuleManifestPath(const Compilation &C, const ToolChain &TC) const { std::string error = "<NOT PRESENT>"; + if (C.getArgs().hasArg(options::OPT_nostdlib)) + return error; + switch (TC.GetCXXStdlibType(C.getArgs())) { case ToolChain::CST_Libcxx: { auto evaluate = [&](const char *library) -> std::optional<std::string> { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index e04b0e7..a28446a 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -55,7 +55,7 @@ namespace format { TYPE(ConflictAlternative) \ TYPE(ConflictEnd) \ TYPE(ConflictStart) \ - /* l_brace of if/for/while */ \ + /* l_brace of if/for/while/switch/catch */ \ TYPE(ControlStatementLBrace) \ TYPE(ControlStatementRBrace) \ TYPE(CppCastLParen) \ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 67066a1..0c9c88a 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4021,29 +4021,28 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { } } - if (IsCpp && - (LineIsFunctionDeclaration || - (FirstNonComment && FirstNonComment->is(TT_CtorDtorDeclName))) && - Line.endsWith(tok::semi, tok::r_brace)) { - auto *Tok = Line.Last->Previous; - while (Tok->isNot(tok::r_brace)) - Tok = Tok->Previous; - if (auto *LBrace = Tok->MatchingParen; LBrace && LBrace->is(TT_Unknown)) { - assert(LBrace->is(tok::l_brace)); - Tok->setBlockKind(BK_Block); - LBrace->setBlockKind(BK_Block); - LBrace->setFinalizedType(TT_FunctionLBrace); + if (IsCpp) { + if ((LineIsFunctionDeclaration || + (FirstNonComment && FirstNonComment->is(TT_CtorDtorDeclName))) && + Line.endsWith(tok::semi, tok::r_brace)) { + auto *Tok = Line.Last->Previous; + while (Tok->isNot(tok::r_brace)) + Tok = Tok->Previous; + if (auto *LBrace = Tok->MatchingParen; LBrace && LBrace->is(TT_Unknown)) { + assert(LBrace->is(tok::l_brace)); + Tok->setBlockKind(BK_Block); + LBrace->setBlockKind(BK_Block); + LBrace->setFinalizedType(TT_FunctionLBrace); + } } - } - if (IsCpp && SeenName && AfterLastAttribute && - mustBreakAfterAttributes(*AfterLastAttribute, Style)) { - AfterLastAttribute->MustBreakBefore = true; - if (LineIsFunctionDeclaration) - Line.ReturnTypeWrapped = true; - } + if (SeenName && AfterLastAttribute && + mustBreakAfterAttributes(*AfterLastAttribute, Style)) { + AfterLastAttribute->MustBreakBefore = true; + if (LineIsFunctionDeclaration) + Line.ReturnTypeWrapped = true; + } - if (IsCpp) { if (!LineIsFunctionDeclaration) { // Annotate */&/&& in `operator` function calls as binary operators. for (const auto *Tok = FirstNonComment; Tok; Tok = Tok->Next) { @@ -4089,6 +4088,11 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { } } + if (First->is(TT_ElseLBrace)) { + First->CanBreakBefore = true; + First->MustBreakBefore = true; + } + bool InFunctionDecl = Line.MightBeFunctionDecl; bool InParameterList = false; for (auto *Current = First->Next; Current; Current = Current->Next) { diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index edf0a09..877ab02 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -742,7 +742,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_impl_coroutine", "201902L"); Builder.defineMacro("__cpp_designated_initializers", "201707L"); Builder.defineMacro("__cpp_impl_three_way_comparison", "201907L"); - //Builder.defineMacro("__cpp_modules", "201907L"); + // Intentionally to set __cpp_modules to 1. + // See https://github.com/llvm/llvm-project/issues/71364 for details. + // Builder.defineMacro("__cpp_modules", "201907L"); + Builder.defineMacro("__cpp_modules", "1"); Builder.defineMacro("__cpp_using_enum", "201907L"); } // C++23 features. diff --git a/clang/lib/Frontend/ModuleDependencyCollector.cpp b/clang/lib/Frontend/ModuleDependencyCollector.cpp index 3b363f9..ff37065 100644 --- a/clang/lib/Frontend/ModuleDependencyCollector.cpp +++ b/clang/lib/Frontend/ModuleDependencyCollector.cpp @@ -91,10 +91,10 @@ void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) { std::make_unique<ModuleDependencyMMCallbacks>(*this)); } -static bool isCaseSensitivePath(StringRef Path) { +static bool isCaseSensitivePath(llvm::vfs::FileSystem &VFS, StringRef Path) { SmallString<256> TmpDest = Path, UpperDest, RealDest; // Remove component traversals, links, etc. - if (llvm::sys::fs::real_path(Path, TmpDest)) + if (VFS.getRealPath(Path, TmpDest)) return true; // Current default value in vfs.yaml Path = TmpDest; @@ -104,7 +104,7 @@ static bool isCaseSensitivePath(StringRef Path) { // already expects when sensitivity isn't setup. for (auto &C : Path) UpperDest.push_back(toUppercase(C)); - if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path == RealDest) + if (!VFS.getRealPath(UpperDest, RealDest) && Path == RealDest) return false; return true; } @@ -121,7 +121,8 @@ void ModuleDependencyCollector::writeFileMap() { // Explicitly set case sensitivity for the YAML writer. For that, find out // the sensitivity at the path where the headers all collected to. - VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir)); + VFSWriter.setCaseSensitivity( + isCaseSensitivePath(Canonicalizer.getFileSystem(), VFSDir)); // Do not rely on real path names when executing the crash reproducer scripts // since we only want to actually use the files we have on the VFS cache. @@ -153,7 +154,7 @@ std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src, } else { // When collecting entries from input vfsoverlays, copy the external // contents into the cache but still map from the source. - if (!fs::exists(Dst)) + if (!Canonicalizer.getFileSystem().exists(Dst)) return std::error_code(); path::append(CacheDst, Dst); Paths.CopyFrom = Dst; diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index a7f7099..d6ba19a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2311,10 +2311,9 @@ _mm256_cvttps_epi32(__m256 __a) /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 64 bit double containing the first element of the input vector. -static __inline double __DEFAULT_FN_ATTRS -_mm256_cvtsd_f64(__m256d __a) -{ - return __a[0]; +static __inline double __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsd_f64(__m256d __a) { + return __a[0]; } /// Returns the first element of the input vector of [8 x i32]. @@ -2327,11 +2326,10 @@ _mm256_cvtsd_f64(__m256d __a) /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 32 bit integer containing the first element of the input vector. -static __inline int __DEFAULT_FN_ATTRS -_mm256_cvtsi256_si32(__m256i __a) -{ - __v8si __b = (__v8si)__a; - return __b[0]; +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsi256_si32(__m256i __a) { + __v8si __b = (__v8si)__a; + return __b[0]; } /// Returns the first element of the input vector of [8 x float]. @@ -2344,10 +2342,9 @@ _mm256_cvtsi256_si32(__m256i __a) /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 32 bit float containing the first element of the input vector. -static __inline float __DEFAULT_FN_ATTRS -_mm256_cvtss_f32(__m256 __a) -{ - return __a[0]; +static __inline float __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtss_f32(__m256 __a) { + return __a[0]; } /* Vector replicate */ diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 8605ba2..a2c6957 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1299,7 +1299,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( Diag(Tok, getLangOpts().CPlusPlus23 ? diag::warn_cxx20_compat_decl_attrs_on_lambda : diag::ext_decl_attrs_on_lambda) - << Tok.getIdentifierInfo() << Tok.isRegularKeywordAttribute(); + << Tok.isRegularKeywordAttribute() << Tok.getIdentifierInfo(); MaybeParseCXX11Attributes(D); } diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 02f3f10..04f29c8 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2968,6 +2968,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() { OpenLoc, CloseLoc); } +OMPClause *Parser::ParseOpenMPLoopRangeClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SourceLocation FirstLoc, CountLoc; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + FirstLoc = Tok.getLocation(); + ExprResult FirstVal = ParseConstantExpression(); + if (!FirstVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ExpectAndConsume(tok::comma); + + CountLoc = Tok.getLocation(); + ExprResult CountVal = ParseConstantExpression(); + if (!CountVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + T.consumeClose(); + + return Actions.OpenMP().ActOnOpenMPLoopRangeClause( + FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(), + FirstLoc, CountLoc, T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPPermutationClause() { SourceLocation ClauseNameLoc, OpenLoc, CloseLoc; SmallVector<Expr *> ArgExprs; @@ -3473,6 +3506,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPClause(CKind, WrongDirective); break; + case OMPC_looprange: + Clause = ParseOpenMPLoopRangeClause(); + break; default: break; } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 1b66d83..8606227 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -983,10 +983,9 @@ static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use, case UninitUse::AfterDecl: case UninitUse::AfterCall: S.Diag(VD->getLocation(), diag::warn_sometimes_uninit_var) - << VD->getDeclName() << IsCapturedByBlock - << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5) - << const_cast<DeclContext*>(VD->getLexicalDeclContext()) - << VD->getSourceRange(); + << VD->getDeclName() << IsCapturedByBlock + << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5) + << VD->getLexicalDeclContext() << VD->getSourceRange(); S.Diag(Use.getUser()->getBeginLoc(), diag::note_uninit_var_use) << IsCapturedByBlock << Use.getUser()->getSourceRange(); return; diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index d238b79..dc6d232 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -193,7 +193,7 @@ DiagRecursiveConstraintEval(Sema &S, llvm::FoldingSetNodeID &ID, // Sema::InstantiatingTemplate::isAlreadyBeingInstantiated function. if (S.SatisfactionStackContains(Templ, ID)) { S.Diag(E->getExprLoc(), diag::err_constraint_depends_on_self) - << const_cast<Expr *>(E) << E->getSourceRange(); + << E << E->getSourceRange(); return true; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 9ef7a26..0069b08 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -18909,8 +18909,7 @@ ExprResult Sema::VerifyBitField(SourceLocation FieldLoc, // 'bool'. if (BitfieldIsOverwide && !FieldTy->isBooleanType() && FieldName) { Diag(FieldLoc, diag::warn_bitfield_width_exceeds_type_width) - << FieldName << toString(Value, 10) - << (unsigned)TypeWidth; + << FieldName << Value << (unsigned)TypeWidth; } } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 552c929..a0483c3 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1493,6 +1493,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3b267c1..06b2529 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -16791,12 +16791,11 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, Expr *OrigExpr = E; bool IsMS = false; - // CUDA device code does not support varargs. + // CUDA device global function does not support varargs. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { if (const FunctionDecl *F = dyn_cast<FunctionDecl>(CurContext)) { CUDAFunctionTarget T = CUDA().IdentifyTarget(F); - if (T == CUDAFunctionTarget::Global || T == CUDAFunctionTarget::Device || - T == CUDAFunctionTarget::HostDevice) + if (T == CUDAFunctionTarget::Global) return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device)); } } @@ -20108,8 +20107,9 @@ static void DoMarkVarDeclReferenced( bool NeededForConstantEvaluation = isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr; - bool NeedDefinition = - OdrUse == OdrUseContext::Used || NeededForConstantEvaluation; + bool NeedDefinition = OdrUse == OdrUseContext::Used || + NeededForConstantEvaluation || + Var->getType()->isUndeducedType(); assert(!isa<VarTemplatePartialSpecializationDecl>(Var) && "Can't instantiate a partial template specialization."); diff --git a/clang/lib/Sema/SemaOpenACCAtomic.cpp b/clang/lib/Sema/SemaOpenACCAtomic.cpp index a9319dc..ad21129 100644 --- a/clang/lib/Sema/SemaOpenACCAtomic.cpp +++ b/clang/lib/Sema/SemaOpenACCAtomic.cpp @@ -454,9 +454,7 @@ class AtomicOperandChecker { // If nothing matches, error out. DiagnoseInvalidAtomic(BinInf->FoundExpr->getExprLoc(), SemaRef.PDiag(diag::note_acc_atomic_mismatch_operand) - << const_cast<Expr *>(AssignInf.LHS) - << const_cast<Expr *>(BinInf->LHS) - << const_cast<Expr *>(BinInf->RHS)); + << AssignInf.LHS << BinInf->LHS << BinInf->RHS); return IDACInfo::Fail(); } @@ -592,8 +590,7 @@ class AtomicOperandChecker { PartialDiagnostic PD = SemaRef.PDiag(diag::note_acc_atomic_mismatch_compound_operand) - << FirstKind << const_cast<Expr *>(FirstX) << SecondKind - << const_cast<Expr *>(SecondX); + << FirstKind << FirstX << SecondKind << SecondX; return DiagnoseInvalidAtomic(SecondX->getExprLoc(), PD); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 48e06d1..0fa21e8 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2490,7 +2490,8 @@ VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, DSAStackTy::DSAVarData DVarTop = DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode()); if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind) && - (!VD || VD->hasLocalStorage() || !DVarTop.AppliedToPointee)) + (!VD || VD->hasLocalStorage() || + !(DVarTop.AppliedToPointee && DVarTop.CKind != OMPC_reduction))) return VD ? VD : cast<VarDecl>(DVarTop.PrivateCopy->getDecl()); // Threadprivate variables must not be captured. if (isOpenMPThreadPrivate(DVarTop.CKind)) @@ -4569,6 +4570,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_unroll: case OMPD_reverse: case OMPD_interchange: + case OMPD_fuse: case OMPD_assume: break; default: @@ -6410,6 +6412,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_fuse: + Res = + ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -9488,7 +9494,9 @@ static bool checkOpenMPIterationSpace( // sharing attributes. VarsWithImplicitDSA.erase(LCDecl); - assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars"); + assert((isOpenMPLoopDirective(DKind) || + isOpenMPCanonicalLoopSequenceTransformationDirective(DKind)) && + "DSA for non-loop vars"); // Check test-expr. HasErrors |= ISC.checkAndSetCond(For ? For->getCond() : CXXFor->getCond()); @@ -9916,7 +9924,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, unsigned NumLoops = std::max(OrderedLoopCount, NestedLoopCount); SmallVector<LoopIterationSpace, 4> IterSpaces(NumLoops); if (!OMPLoopBasedDirective::doForAllLoops( - AStmt->IgnoreContainers(!isOpenMPLoopTransformationDirective(DKind)), + AStmt->IgnoreContainers( + !isOpenMPCanonicalLoopNestTransformationDirective(DKind)), SupportsNonPerfectlyNested, NumLoops, [DKind, &SemaRef, &DSA, NumLoops, NestedLoopCount, CollapseLoopCountExpr, OrderedLoopCountExpr, &VarsWithImplicitDSA, @@ -9938,8 +9947,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, } return false; }, - [&SemaRef, - &Captures](OMPCanonicalLoopNestTransformationDirective *Transform) { + [&SemaRef, &Captures](OMPLoopTransformationDirective *Transform) { Stmt *DependentPreInits = Transform->getPreInits(); if (!DependentPreInits) return; @@ -9954,7 +9962,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, auto *D = cast<VarDecl>(C); DeclRefExpr *Ref = buildDeclRefExpr( SemaRef, D, D->getType().getNonReferenceType(), - Transform->getBeginLoc()); + cast<OMPExecutableDirective>(Transform->getDirective()) + ->getBeginLoc()); Captures[Ref] = Ref; } } @@ -14404,10 +14413,34 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +/// Updates OriginalInits by checking Transform against loop transformation +/// directives and appending their pre-inits if a match is found. +static void updatePreInits(OMPLoopTransformationDirective *Transform, + SmallVectorImpl<Stmt *> &PreInits) { + Stmt *Dir = Transform->getDirective(); + switch (Dir->getStmtClass()) { +#define STMT(CLASS, PARENT) +#define ABSTRACT_STMT(CLASS) +#define COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT) \ + case Stmt::CLASS##Class: \ + appendFlattenedStmtList(PreInits, \ + static_cast<const CLASS *>(Dir)->getPreInits()); \ + break; +#define OMPCANONICALLOOPNESTTRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT) +#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \ + COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT) +#include "clang/AST/StmtNodes.inc" +#undef COMMON_OMP_LOOP_TRANSFORMATION + default: + llvm_unreachable("Not a loop transformation"); + } +} + bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers, - Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits) { + Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits) { OriginalInits.emplace_back(); bool Result = OMPLoopBasedDirective::doForAllLoops( AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops, @@ -14433,29 +14466,268 @@ bool SemaOpenMP::checkTransformableLoopNest( OriginalInits.emplace_back(); return false; }, - [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast<OMPTileDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPStripeDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + [&OriginalInits](OMPLoopTransformationDirective *Transform) { + updatePreInits(Transform, OriginalInits.back()); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); return Result; } -/// Add preinit statements that need to be propageted from the selected loop. +/// Counts the total number of OpenMP canonical nested loops, including the +/// outermost loop (the original loop). PRECONDITION of this visitor is that it +/// must be invoked from the original loop to be analyzed. The traversal stops +/// for Decl's and Expr's given that they may contain inner loops that must not +/// be counted. +/// +/// Example AST structure for the code: +/// +/// int main() { +/// #pragma omp fuse +/// { +/// for (int i = 0; i < 100; i++) { <-- Outer loop +/// []() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP (1) +/// }; +/// for(int j = 0; j < 5; ++j) {} <-- Inner loop +/// } +/// for (int r = 0; i < 100; i++) { <-- Outer loop +/// struct LocalClass { +/// void bar() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP (2) +/// } +/// }; +/// for(int k = 0; k < 10; ++k) {} <-- Inner loop +/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP (3) +/// } +/// } +/// } +/// (1) because in a different function (here: a lambda) +/// (2) because in a different function (here: class method) +/// (3) because considered to be intervening-code of non-perfectly nested loop +/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops. +class NestedLoopCounterVisitor final : public DynamicRecursiveASTVisitor { +private: + unsigned NestedLoopCount = 0; + +public: + explicit NestedLoopCounterVisitor() = default; + + unsigned getNestedLoopCount() const { return NestedLoopCount; } + + bool VisitForStmt(ForStmt *FS) override { + ++NestedLoopCount; + return true; + } + + bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { + ++NestedLoopCount; + return true; + } + + bool TraverseStmt(Stmt *S) override { + if (!S) + return true; + + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted. + if (isa<Expr>(S)) + return true; + + // Only recurse into CompoundStmt (block {}) and loop bodies. + if (isa<CompoundStmt, ForStmt, CXXForRangeStmt>(S)) { + return DynamicRecursiveASTVisitor::TraverseStmt(S); + } + + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...). + return true; + } + + bool TraverseDecl(Decl *D) override { + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...). + return true; + } +}; + +bool SemaOpenMP::analyzeLoopSequence(Stmt *LoopSeqStmt, + LoopSequenceAnalysis &SeqAnalysis, + ASTContext &Context, + OpenMPDirectiveKind Kind) { + VarsWithInheritedDSAType TmpDSA; + // Helper Lambda to handle storing initialization and body statements for + // both ForStmt and CXXForRangeStmt. + auto StoreLoopStatements = [](LoopAnalysis &Analysis, Stmt *LoopStmt) { + if (auto *For = dyn_cast<ForStmt>(LoopStmt)) { + Analysis.OriginalInits.push_back(For->getInit()); + Analysis.TheForStmt = For; + } else { + auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt); + Analysis.OriginalInits.push_back(CXXFor->getBeginStmt()); + Analysis.TheForStmt = CXXFor; + } + }; + + // Helper lambda functions to encapsulate the processing of different + // derivations of the canonical loop sequence grammar + // Modularized code for handling loop generation and transformations. + auto AnalyzeLoopGeneration = [&](Stmt *Child) { + auto *LoopTransform = cast<OMPLoopTransformationDirective>(Child); + Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); + unsigned NumGeneratedTopLevelLoops = + LoopTransform->getNumGeneratedTopLevelLoops(); + // Handle the case where transformed statement is not available due to + // dependent contexts + if (!TransformedStmt) { + if (NumGeneratedTopLevelLoops > 0) { + SeqAnalysis.LoopSeqSize += NumGeneratedTopLevelLoops; + return true; + } + // Unroll full (0 loops produced) + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + // Handle loop transformations with multiple loop nests + // Unroll full + if (!NumGeneratedTopLevelLoops) { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + // Loop transformatons such as split or loopranged fuse + if (NumGeneratedTopLevelLoops > 1) { + // Get the preinits related to this loop sequence generating + // loop transformation (i.e loopranged fuse, split...) + // These preinits differ slightly from regular inits/pre-inits related + // to single loop generating loop transformations (interchange, unroll) + // given that they are not bounded to a particular loop nest + // so they need to be treated independently + updatePreInits(LoopTransform, SeqAnalysis.LoopSequencePreInits); + return analyzeLoopSequence(TransformedStmt, SeqAnalysis, Context, Kind); + } + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + // Process the transformed loop statement + LoopAnalysis &NewTransformedSingleLoop = + SeqAnalysis.Loops.emplace_back(Child); + unsigned IsCanonical = checkOpenMPLoop( + Kind, nullptr, nullptr, TransformedStmt, SemaRef, *DSAStack, TmpDSA, + NewTransformedSingleLoop.HelperExprs); + + if (!IsCanonical) + return false; + + StoreLoopStatements(NewTransformedSingleLoop, TransformedStmt); + updatePreInits(LoopTransform, NewTransformedSingleLoop.TransformsPreInits); + + SeqAnalysis.LoopSeqSize++; + return true; + }; + + // Modularized code for handling regular canonical loops. + auto AnalyzeRegularLoop = [&](Stmt *Child) { + LoopAnalysis &NewRegularLoop = SeqAnalysis.Loops.emplace_back(Child); + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, NewRegularLoop.HelperExprs); + + if (!IsCanonical) + return false; + + StoreLoopStatements(NewRegularLoop, Child); + NestedLoopCounterVisitor NLCV; + NLCV.TraverseStmt(Child); + return true; + }; + + // High level grammar validation. + for (Stmt *Child : LoopSeqStmt->children()) { + if (!Child) + continue; + // Skip over non-loop-sequence statements. + if (!LoopSequenceAnalysis::isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + // Ignore empty compound statement. + if (!Child) + continue; + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required. + if (isa<CompoundStmt>(Child)) { + if (!analyzeLoopSequence(Child, SeqAnalysis, Context, Kind)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling. + if (LoopSequenceAnalysis::isLoopSequenceDerivation(Child)) { + if (LoopAnalysis::isLoopTransformation(Child)) { + if (!AnalyzeLoopGeneration(Child)) + return false; + // AnalyzeLoopGeneration updates SeqAnalysis.LoopSeqSize accordingly. + } else { + if (!AnalyzeRegularLoop(Child)) + return false; + SeqAnalysis.LoopSeqSize++; + } + } else { + // Report error for invalid statement inside canonical loop sequence. + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + return true; +} + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, LoopSequenceAnalysis &SeqAnalysis, + ASTContext &Context) { + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the helper function + // analyzeLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure. This function will perform both a semantic and syntactical + // analysis of the given statement according to OpenMP 6.0 definition of + // the aforementioned canonical loop sequence. + + // We expect an outer compound statement. + if (!isa<CompoundStmt>(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + + // Recursive entry point to process the main loop sequence + if (!analyzeLoopSequence(AStmt, SeqAnalysis, Context, Kind)) + return false; + + // Diagnose an empty loop sequence. + if (!SeqAnalysis.LoopSeqSize) { + Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + return true; +} + +/// Add preinit statements that need to be propagated from the selected loop. static void addLoopPreInits(ASTContext &Context, OMPLoopBasedDirective::HelperExprs &LoopHelper, Stmt *LoopStmt, ArrayRef<Stmt *> OriginalInit, @@ -14540,7 +14812,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, // Verify and diagnose loop nest. SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits; + SmallVector<SmallVector<Stmt *>, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -14817,7 +15089,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef<OMPClause *> Clauses, // Verify and diagnose loop nest. SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits; + SmallVector<SmallVector<Stmt *>, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15078,7 +15350,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses, Stmt *Body = nullptr; SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers( NumLoops); - SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits; + SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15348,7 +15620,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, Stmt *Body = nullptr; SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers( NumLoops); - SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits; + SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15540,7 +15812,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( // Verify and diagnose loop nest. SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector<SmallVector<Stmt *, 0>, 2> OriginalInits; + SmallVector<SmallVector<Stmt *>, 2> OriginalInits; if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15716,6 +15988,484 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + + ASTContext &Context = getASTContext(); + DeclContext *CurrContext = SemaRef.CurContext; + Scope *CurScope = SemaRef.getCurScope(); + CaptureVars CopyTransformer(SemaRef); + + // Ensure the structured block is not empty + if (!AStmt) + return StmtError(); + + // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder 1 (even though + // using looprange fuse could yield up to 3 top level loop nests) + // because a dependent context could prevent determining its true value + if (CurrContext->isDependentContext()) + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + /* NumLoops */ 1, AStmt, nullptr, nullptr); + + // Validate that the potential loop sequence is transformable for fusion + // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops + LoopSequenceAnalysis SeqAnalysis; + if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, SeqAnalysis, Context)) + return StmtError(); + + // SeqAnalysis.LoopSeqSize exists mostly to handle dependent contexts, + // otherwise it must be the same as SeqAnalysis.Loops.size(). + assert(SeqAnalysis.LoopSeqSize == SeqAnalysis.Loops.size() && + "Inconsistent size of the loop sequence and the number of loops " + "found in the sequence"); + + // Handle clauses, which can be any of the following: [looprange, apply] + const auto *LRC = + OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses); + + // The clause arguments are invalidated if any error arises + // such as non-constant or non-positive arguments + if (LRC && (!LRC->getFirst() || !LRC->getCount())) + return StmtError(); + + // Delayed semantic check of LoopRange constraint + // Evaluates the loop range arguments and returns the first and count values + auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count, + uint64_t &FirstVal, + uint64_t &CountVal) { + llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context); + llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context); + FirstVal = FirstInt.getZExtValue(); + CountVal = CountInt.getZExtValue(); + }; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, + unsigned NumLoops) -> bool { + return FirstVal + CountVal - 1 <= NumLoops; + }; + uint64_t FirstVal = 1, CountVal = 0, LastVal = SeqAnalysis.LoopSeqSize; + + // Validates the loop range after evaluating the semantic information + // and ensures that the range is valid for the given loop sequence size. + // Expressions are evaluated at compile time to obtain constant values. + if (LRC) { + EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, + CountVal); + if (CountVal == 1) + SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + if (!ValidLoopRange(FirstVal, CountVal, SeqAnalysis.LoopSeqSize)) { + SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange) + << getOpenMPDirectiveName(OMPD_fuse) << FirstVal + << (FirstVal + CountVal - 1) << SeqAnalysis.LoopSeqSize; + return StmtError(); + } + + LastVal = FirstVal + CountVal - 1; + } + + // Complete fusion generates a single canonical loop nest + // However looprange clause may generate several loop nests + unsigned NumGeneratedTopLevelLoops = + LRC ? SeqAnalysis.LoopSeqSize - CountVal + 1 : 1; + + // Emit a warning for redundant loop fusion when the sequence contains only + // one loop. + if (SeqAnalysis.LoopSeqSize == 1) + SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + // Select the type with the largest bit width among all induction variables + QualType IVType = + SeqAnalysis.Loops[FirstVal - 1].HelperExprs.IterationVarRef->getType(); + for (unsigned I : llvm::seq<unsigned>(FirstVal, LastVal)) { + QualType CurrentIVType = + SeqAnalysis.Loops[I].HelperExprs.IterationVarRef->getType(); + if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { + IVType = CurrentIVType; + } + } + uint64_t IVBitWidth = Context.getIntWidth(IVType); + + // Create pre-init declarations for all loops lower bounds, upper bounds, + // strides and num-iterations for every top level loop in the fusion + SmallVector<VarDecl *, 4> LBVarDecls; + SmallVector<VarDecl *, 4> STVarDecls; + SmallVector<VarDecl *, 4> NIVarDecls; + SmallVector<VarDecl *, 4> UBVarDecls; + SmallVector<VarDecl *, 4> IVVarDecls; + + // Helper lambda to create variables for bounds, strides, and other + // expressions. Generates both the variable declaration and the corresponding + // initialization statement. + auto CreateHelperVarAndStmt = + [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName, + unsigned I, bool NeedsNewVD = false) { + Expr *TransformedExpr = + AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); + if (!TransformedExpr) + return std::pair<VarDecl *, StmtResult>(nullptr, StmtError()); + + auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str(); + + VarDecl *VD; + if (NeedsNewVD) { + VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name); + SemaRef.AddInitializerToDecl(VD, TransformedExpr, false); + } else { + // Create a unique variable name + DeclRefExpr *DRE = cast<DeclRefExpr>(TransformedExpr); + VD = cast<VarDecl>(DRE->getDecl()); + VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name)); + } + // Create the corresponding declaration statement + StmtResult DeclStmt = new (Context) class DeclStmt( + DeclGroupRef(VD), SourceLocation(), SourceLocation()); + return std::make_pair(VD, DeclStmt); + }; + + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. Other loop transforms + // also contain their own preinits + SmallVector<Stmt *> PreInits; + + // Update the general preinits using the preinits generated by loop sequence + // generating loop transformations. These preinits differ slightly from + // single-loop transformation preinits, as they can be detached from a + // specific loop inside multiple generated loop nests. This happens + // because certain helper variables, like '.omp.fuse.max', are introduced to + // handle fused iteration spaces and may not be directly tied to a single + // original loop. The preinit structure must ensure that hidden variables + // like '.omp.fuse.max' are still properly handled. + // Transformations that apply this concept: Loopranged Fuse, Split + llvm::append_range(PreInits, SeqAnalysis.LoopSequencePreInits); + + // Process each single loop to generate and collect declarations + // and statements for all helper expressions related to + // particular single loop nests + + // Also In the case of the fused loops, we keep track of their original + // inits by appending them to their preinits statement, and in the case of + // transformations, also append their preinits (which contain the original + // loop initialization statement or other statements) + + // Firstly we need to set TransformIndex to match the begining of the + // looprange section + unsigned int TransformIndex = 0; + for (unsigned I : llvm::seq<unsigned>(FirstVal - 1)) { + if (SeqAnalysis.Loops[I].isLoopTransformation()) + ++TransformIndex; + } + + for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + if (SeqAnalysis.Loops[I].isRegularLoop()) { + addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs, + SeqAnalysis.Loops[I].TheForStmt, + SeqAnalysis.Loops[I].OriginalInits, PreInits); + } else if (SeqAnalysis.Loops[I].isLoopTransformation()) { + // For transformed loops, insert both pre-inits and original inits. + // Order matters: pre-inits may define variables used in the original + // inits such as upper bounds... + SmallVector<Stmt *> &TransformPreInit = + SeqAnalysis.Loops[TransformIndex++].TransformsPreInits; + llvm::append_range(PreInits, TransformPreInit); + + addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs, + SeqAnalysis.Loops[I].TheForStmt, + SeqAnalysis.Loops[I].OriginalInits, PreInits); + } + auto [UBVD, UBDStmt] = + CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.UB, "ub", J); + auto [LBVD, LBDStmt] = + CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.LB, "lb", J); + auto [STVD, STDStmt] = + CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.ST, "st", J); + auto [NIVD, NIDStmt] = CreateHelperVarAndStmt( + SeqAnalysis.Loops[I].HelperExprs.NumIterations, "ni", J, true); + auto [IVVD, IVDStmt] = CreateHelperVarAndStmt( + SeqAnalysis.Loops[I].HelperExprs.IterationVarRef, "iv", J); + + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); + + UBVarDecls.push_back(UBVD); + LBVarDecls.push_back(LBVD); + STVarDecls.push_back(STVD); + NIVarDecls.push_back(NIVD); + IVVarDecls.push_back(IVVD); + + PreInits.push_back(LBDStmt.get()); + PreInits.push_back(STDStmt.get()); + PreInits.push_back(NIDStmt.get()); + PreInits.push_back(IVDStmt.get()); + } + + auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) { + return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(), + false); + }; + + // Following up the creation of the final fused loop will be performed + // which has the following shape (considering the selected loops): + // + // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) { + // if (fuse.index < ni0){ + // iv0 = lb0 + st0 * fuse.index; + // original.index0 = iv0 + // body(0); + // } + // if (fuse.index < ni1){ + // iv1 = lb1 + st1 * fuse.index; + // original.index1 = iv1 + // body(1); + // } + // + // ... + // + // if (fuse.index < nik){ + // ivk = lbk + stk * fuse.index; + // original.indexk = ivk + // body(k); Expr *InitVal = IntegerLiteral::Create(Context, + // llvm::APInt(IVWidth, 0), + // } + + // 1. Create the initialized fuse index + StringRef IndexName = ".omp.fuse.index"; + Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), + IVType, SourceLocation()); + VarDecl *IndexDecl = + buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr); + SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation()); + + if (!InitStmt.isUsable()) + return StmtError(); + + auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType, + Loc = InitVal->getExprLoc()]() { + return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false); + }; + + // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2, + // ..., NI_k) + // + // This loop accumulates the maximum value across multiple expressions, + // ensuring each step constructs a unique AST node for correctness. By using + // intermediate temporary variables and conditional operators, we maintain + // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c): + // omp.temp0 = max(a, b) + // omp.temp1 = max(omp.temp0, c) + // omp.fuse.max = max(omp.temp1, omp.temp0) + + ExprResult MaxExpr; + // I is the range of loops in the sequence that we fuse. + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]); + QualType NITy = NIRef->getType(); + + if (MaxExpr.isUnset()) { + // Initialize MaxExpr with the first NI expression + MaxExpr = NIRef; + } else { + // Create a new acummulator variable t_i = MaxExpr + std::string TempName = (Twine(".omp.temp.") + Twine(J)).str(); + VarDecl *TempDecl = + buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); + TempDecl->setInit(MaxExpr.get()); + DeclRefExpr *TempRef = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + DeclRefExpr *TempRef2 = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + // Add a DeclStmt to PreInits to ensure the variable is declared. + StmtResult TempStmt = new (Context) + DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation()); + + if (!TempStmt.isUsable()) + return StmtError(); + PreInits.push_back(TempStmt.get()); + + // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef) + ExprResult Comparison = + SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef); + // Handle any errors in Comparison creation + if (!Comparison.isUsable()) + return StmtError(); + + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]); + // Update MaxExpr using a conditional expression to hold the max value + MaxExpr = new (Context) ConditionalOperator( + Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), + NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary); + + if (!MaxExpr.isUsable()) + return StmtError(); + } + } + if (!MaxExpr.isUsable()) + return StmtError(); + + // 3. Declare the max variable + const std::string MaxName = Twine(".omp.fuse.max").str(); + VarDecl *MaxDecl = + buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr); + MaxDecl->setInit(MaxExpr.get()); + DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false); + StmtResult MaxStmt = new (Context) + DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation()); + + if (MaxStmt.isInvalid()) + return StmtError(); + PreInits.push_back(MaxStmt.get()); + + // 4. Create condition Expr: index < n_max + ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, + MakeIVRef(), MaxRef); + if (!CondExpr.isUsable()) + return StmtError(); + + // 5. Increment Expr: ++index + ExprResult IncrExpr = + SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef()); + if (!IncrExpr.isUsable()) + return StmtError(); + + // 6. Build the Fused Loop Body + // The final fused loop iterates over the maximum logical range. Inside the + // loop, each original loop's index is calculated dynamically, and its body + // is executed conditionally. + // + // Each sub-loop's body is guarded by a conditional statement to ensure + // it executes only within its logical iteration range: + // + // if (fuse.index < ni_k){ + // iv_k = lb_k + st_k * fuse.index; + // original.index = iv_k + // body(k); + // } + + CompoundStmt *FusedBody = nullptr; + SmallVector<Stmt *, 4> FusedBodyStmts; + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + // Assingment of the original sub-loop index to compute the logical index + // IV_k = LB_k + omp.fuse.index * ST_k + ExprResult IdxExpr = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, + MakeVarDeclRef(STVarDecls[J]), MakeIVRef()); + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, + MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get()); + + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, + MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get()); + if (!IdxExpr.isUsable()) + return StmtError(); + + // Update the original i_k = IV_k + SmallVector<Stmt *, 4> BodyStmts; + BodyStmts.push_back(IdxExpr.get()); + llvm::append_range(BodyStmts, SeqAnalysis.Loops[I].HelperExprs.Updates); + + // If the loop is a CXXForRangeStmt then the iterator variable is needed + if (auto *SourceCXXFor = + dyn_cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].TheForStmt)) + BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); + + Stmt *Body = + (isa<ForStmt>(SeqAnalysis.Loops[I].TheForStmt)) + ? cast<ForStmt>(SeqAnalysis.Loops[I].TheForStmt)->getBody() + : cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].TheForStmt)->getBody(); + BodyStmts.push_back(Body); + + CompoundStmt *CombinedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + ExprResult Condition = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), + MakeVarDeclRef(NIVarDecls[J])); + + if (!Condition.isUsable()) + return StmtError(); + + IfStmt *IfStatement = IfStmt::Create( + Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr, + Condition.get(), SourceLocation(), SourceLocation(), CombinedBody, + SourceLocation(), nullptr); + + FusedBodyStmts.push_back(IfStatement); + } + FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + + // 7. Construct the final fused loop + ForStmt *FusedForStmt = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(), + FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), + IncrExpr.get()->getEndLoc()); + + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + // + // Note: If looprange clause produces a single fused loop nest then + // this compound statement wrapper is unnecessary (Therefore this + // treatment is skipped) + + Stmt *FusionStmt = FusedForStmt; + if (LRC && CountVal != SeqAnalysis.LoopSeqSize) { + SmallVector<Stmt *, 4> FinalLoops; + + // Reset the transform index + TransformIndex = 0; + + // Collect all non-fused loops before and after the fused region. + // Pre-fusion and post-fusion loops are inserted in order exploiting their + // symmetry, along with their corresponding transformation pre-inits if + // needed. The fused loop is added between the two regions. + for (unsigned I : llvm::seq<unsigned>(SeqAnalysis.LoopSeqSize)) { + if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) { + // Update the Transformation counter to skip already treated + // loop transformations + if (!SeqAnalysis.Loops[I].isLoopTransformation()) + ++TransformIndex; + continue; + } + + // No need to handle: + // Regular loops: they are kept intact as-is. + // Loop-sequence-generating transformations: already handled earlier. + // Only TransformSingleLoop requires inserting pre-inits here + if (SeqAnalysis.Loops[I].isRegularLoop()) { + const auto &TransformPreInit = + SeqAnalysis.Loops[TransformIndex++].TransformsPreInits; + if (!TransformPreInit.empty()) + llvm::append_range(PreInits, TransformPreInit); + } + + FinalLoops.push_back(SeqAnalysis.Loops[I].TheForStmt); + } + + FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt); + FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + } + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumGeneratedTopLevelLoops, AStmt, FusionStmt, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, @@ -16887,6 +17637,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr, FactorExpr); } +OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( + Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) { + + // OpenMP [6.0, Restrictions] + // First and Count must be integer expressions with positive value + ExprResult FirstVal = + VerifyPositiveIntegerConstantInClause(First, OMPC_looprange); + if (FirstVal.isInvalid()) + First = nullptr; + + ExprResult CountVal = + VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange); + if (CountVal.isInvalid()) + Count = nullptr; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + // This check must be performed afterwards due to the delayed + // parsing and computation of the associated loop sequence + return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, + FirstLoc, CountLoc, EndLoc, First, Count); +} + OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 3ebbb30..2bf1511 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7102,7 +7102,7 @@ ExprResult Sema::CheckTemplateArgument(NamedDecl *Param, QualType ParamType, // If the parameter type somehow involves auto, deduce the type now. DeducedType *DeducedT = ParamType->getContainedDeducedType(); - bool IsDeduced = DeducedT && !DeducedT->isDeduced(); + bool IsDeduced = DeducedT && DeducedT->getDeducedType().isNull(); if (IsDeduced) { // When checking a deduced template argument, deduce from its type even if // the type is dependent, in order to check the types of non-type template diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 1ff94d7..f1c9c5c 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -616,29 +616,30 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( Invalid = true; return; } - Invalid = CheckInstantiationDepth(PointOfInstantiation, InstantiationRange); + + CodeSynthesisContext Inst; + Inst.Kind = Kind; + Inst.PointOfInstantiation = PointOfInstantiation; + Inst.Entity = Entity; + Inst.Template = Template; + Inst.TemplateArgs = TemplateArgs.data(); + Inst.NumTemplateArgs = TemplateArgs.size(); + Inst.DeductionInfo = DeductionInfo; + Inst.InstantiationRange = InstantiationRange; + Inst.InConstraintSubstitution = + Inst.Kind == CodeSynthesisContext::ConstraintSubstitution; + if (!SemaRef.CodeSynthesisContexts.empty()) + Inst.InConstraintSubstitution |= + SemaRef.CodeSynthesisContexts.back().InConstraintSubstitution; + + Invalid = SemaRef.pushCodeSynthesisContext(Inst); if (!Invalid) { - CodeSynthesisContext Inst; - Inst.Kind = Kind; - Inst.PointOfInstantiation = PointOfInstantiation; - Inst.Entity = Entity; - Inst.Template = Template; - Inst.TemplateArgs = TemplateArgs.data(); - Inst.NumTemplateArgs = TemplateArgs.size(); - Inst.DeductionInfo = DeductionInfo; - Inst.InstantiationRange = InstantiationRange; - Inst.InConstraintSubstitution = - Inst.Kind == CodeSynthesisContext::ConstraintSubstitution; - if (!SemaRef.CodeSynthesisContexts.empty()) - Inst.InConstraintSubstitution |= - SemaRef.CodeSynthesisContexts.back().InConstraintSubstitution; - - SemaRef.pushCodeSynthesisContext(Inst); - - AlreadyInstantiating = !Inst.Entity ? false : - !SemaRef.InstantiatingSpecializations - .insert({Inst.Entity->getCanonicalDecl(), Inst.Kind}) - .second; + AlreadyInstantiating = + !Inst.Entity + ? false + : !SemaRef.InstantiatingSpecializations + .insert({Inst.Entity->getCanonicalDecl(), Inst.Kind}) + .second; atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, Inst); } } @@ -834,18 +835,34 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( : InstantiatingTemplate(SemaRef, CodeSynthesisContext::PartialOrderingTTP, ArgLoc, InstantiationRange, PArg) {} -void Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { +bool Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { Ctx.SavedInNonInstantiationSFINAEContext = InNonInstantiationSFINAEContext; InNonInstantiationSFINAEContext = false; - CodeSynthesisContexts.push_back(Ctx); - - if (!Ctx.isInstantiationRecord()) + if (!Ctx.isInstantiationRecord()) { ++NonInstantiationEntries; + } else { + assert(SemaRef.NonInstantiationEntries <= + SemaRef.CodeSynthesisContexts.size()); + if ((SemaRef.CodeSynthesisContexts.size() - + SemaRef.NonInstantiationEntries) > + SemaRef.getLangOpts().InstantiationDepth) { + SemaRef.Diag(Ctx.PointOfInstantiation, + diag::err_template_recursion_depth_exceeded) + << SemaRef.getLangOpts().InstantiationDepth << Ctx.InstantiationRange; + SemaRef.Diag(Ctx.PointOfInstantiation, + diag::note_template_recursion_depth) + << SemaRef.getLangOpts().InstantiationDepth; + return true; + } + } + + CodeSynthesisContexts.push_back(Ctx); // Check to see if we're low on stack space. We can't do anything about this // from here, but we can at least warn the user. StackHandler.warnOnStackNearlyExhausted(Ctx.PointOfInstantiation); + return false; } void Sema::popCodeSynthesisContext() { @@ -907,25 +924,6 @@ static std::string convertCallArgsToString(Sema &S, return Result; } -bool Sema::InstantiatingTemplate::CheckInstantiationDepth( - SourceLocation PointOfInstantiation, - SourceRange InstantiationRange) { - assert(SemaRef.NonInstantiationEntries <= - SemaRef.CodeSynthesisContexts.size()); - if ((SemaRef.CodeSynthesisContexts.size() - - SemaRef.NonInstantiationEntries) - <= SemaRef.getLangOpts().InstantiationDepth) - return false; - - SemaRef.Diag(PointOfInstantiation, - diag::err_template_recursion_depth_exceeded) - << SemaRef.getLangOpts().InstantiationDepth - << InstantiationRange; - SemaRef.Diag(PointOfInstantiation, diag::note_template_recursion_depth) - << SemaRef.getLangOpts().InstantiationDepth; - return true; -} - void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) { // Determine which template instantiations to skip, if any. unsigned SkipStart = CodeSynthesisContexts.size(), SkipEnd = SkipStart; diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp index c2427dcf..6c798d6 100644 --- a/clang/lib/Sema/SemaTypeTraits.cpp +++ b/clang/lib/Sema/SemaTypeTraits.cpp @@ -1163,13 +1163,16 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT, // - it has at least one trivial eligible constructor and a trivial, // non-deleted destructor. const CXXDestructorDecl *Dtor = RD->getDestructor(); - if (UnqualT->isAggregateType()) - if (Dtor && !Dtor->isUserProvided()) - return true; - if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) - if (RD->hasTrivialDefaultConstructor() || - RD->hasTrivialCopyConstructor() || RD->hasTrivialMoveConstructor()) - return true; + if (UnqualT->isAggregateType() && (!Dtor || !Dtor->isUserProvided())) + return true; + if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) { + for (CXXConstructorDecl *Ctr : RD->ctors()) { + if (Ctr->isIneligibleOrNotSelected() || Ctr->isDeleted()) + continue; + if (Ctr->isTrivial()) + return true; + } + } return false; } case UTT_IsIntangibleType: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 0214078..6967301 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1783,6 +1783,14 @@ public: LParenLoc, EndLoc); } + OMPClause * + RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPLoopRangeClause( + First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -9609,6 +9617,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective( template <typename Derived> StmtResult +TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + +template <typename Derived> +StmtResult TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) { DeclarationNameInfo DirName; getDerived().getSema().OpenMP().StartOpenMPDSABlock( @@ -10502,6 +10521,31 @@ TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) { template <typename Derived> OMPClause * +TreeTransform<Derived>::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) { + ExprResult F = getDerived().TransformExpr(C->getFirst()); + if (F.isInvalid()) + return nullptr; + + ExprResult Cn = getDerived().TransformExpr(C->getCount()); + if (Cn.isInvalid()) + return nullptr; + + Expr *First = F.get(); + Expr *Count = Cn.get(); + + bool Changed = (First != C->getFirst()) || (Count != C->getCount()); + + // If no changes and AlwaysRebuild() is false, return the original clause + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + + return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(), + C->getLParenLoc(), C->getFirstLoc(), + C->getCountLoc(), C->getEndLoc()); +} + +template <typename Derived> +OMPClause * TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) { ExprResult E = getDerived().TransformExpr(C->getNumForLoops()); if (E.isInvalid()) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 9ee8a0f..c05e428 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11215,6 +11215,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_partial: C = OMPPartialClause::CreateEmpty(Context); break; + case llvm::omp::OMPC_looprange: + C = OMPLoopRangeClause::CreateEmpty(Context); + break; case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -11618,6 +11621,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + C->setFirst(Record.readSubExpr()); + C->setCount(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); + C->setFirstLoc(Record.readSourceLocation()); + C->setCountLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 213c2c2..70b898a 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2469,10 +2469,21 @@ void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) { VisitOMPCanonicalLoopNestTransformationDirective(D); } +void ASTStmtReader::VisitOMPCanonicalLoopSequenceTransformationDirective( + OMPCanonicalLoopSequenceTransformationDirective *D) { + VisitStmt(D); + VisitOMPExecutableDirective(D); + D->setNumGeneratedTopLevelLoops(Record.readUInt32()); +} + void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { VisitOMPCanonicalLoopNestTransformationDirective(D); } +void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPCanonicalLoopSequenceTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3615,6 +3626,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_FUSE_DIRECTIVE: { + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses); + break; + } + case STMT_OMP_INTERCHANGE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 09859da..cdf95ba 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7882,6 +7882,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + Record.AddStmt(C->getFirst()); + Record.AddStmt(C->getCount()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getFirstLoc()); + Record.AddSourceLocation(C->getCountLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 21c04dd..ebda91e 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2487,6 +2487,18 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPCanonicalLoopSequenceTransformationDirective( + OMPCanonicalLoopSequenceTransformationDirective *D) { + VisitStmt(D); + VisitOMPExecutableDirective(D); + Record.writeUInt32(D->getNumGeneratedTopLevelLoops()); +} + +void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPCanonicalLoopSequenceTransformationDirective(D); + Code = serialization::STMT_OMP_FUSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 36f316d..0ae784c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -672,6 +672,10 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, ProgramStateRef stateTrue, stateFalse; + if (!First.Expression->getType()->isAnyPointerType() || + !Second.Expression->getType()->isAnyPointerType()) + return state; + // Assume different address spaces cannot overlap. if (First.Expression->getType()->getPointeeType().getAddressSpace() != Second.Expression->getType()->getPointeeType().getAddressSpace()) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 785cdfa..4e472b7 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1814,6 +1814,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPStripeDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: diff --git a/clang/test/Analysis/buffer-overlap-decls.c b/clang/test/Analysis/buffer-overlap-decls.c new file mode 100644 index 0000000..4830f4e --- /dev/null +++ b/clang/test/Analysis/buffer-overlap-decls.c @@ -0,0 +1,23 @@ +// RUN: %clang_analyze_cc1 -verify %s -Wno-incompatible-library-redeclaration \ +// RUN: -analyzer-checker=alpha.unix.cstring.BufferOverlap +// expected-no-diagnostics + +typedef typeof(sizeof(int)) size_t; + +void memcpy(int dst, int src, size_t size); + +void test_memcpy_proxy() { + memcpy(42, 42, 42); // no-crash +} + +void strcpy(int dst, char *src); + +void test_strcpy_proxy() { + strcpy(42, (char *)42); // no-crash +} + +void strxfrm(int dst, char *src, size_t size); + +void test_strxfrm_proxy() { + strxfrm(42, (char *)42, 42); // no-crash +} diff --git a/clang/test/Analysis/buffer-overlap.c b/clang/test/Analysis/buffer-overlap.c index 8414a76..defb17a 100644 --- a/clang/test/Analysis/buffer-overlap.c +++ b/clang/test/Analysis/buffer-overlap.c @@ -96,3 +96,10 @@ void test_snprintf6() { char b[4] = {0}; snprintf(a, sizeof(a), "%s", b); // no-warning } + +void* memcpy(void* dest, const void* src, size_t count); + +void test_memcpy_esoteric() { +label: + memcpy((char *)&&label, (const char *)memcpy, 1); +} diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif index 85e710f..501d27c 100644 --- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif +++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif @@ -141,4 +141,4 @@ } ], "version": "[SARIF version]" -}
\ No newline at end of file +} diff --git a/clang/test/Analysis/lit.local.cfg b/clang/test/Analysis/lit.local.cfg index 3d60a16..03ab418 100644 --- a/clang/test/Analysis/lit.local.cfg +++ b/clang/test/Analysis/lit.local.cfg @@ -17,11 +17,13 @@ config.substitutions.append( ) ) +sed_cmd = "/opt/freeware/bin/sed" if "system-aix" in config.available_features else "sed" + # Filtering command for testing SARIF output against reference output. config.substitutions.append( ( "%normalize_sarif", - "sed -r '%s;%s;%s;%s'" + f"{sed_cmd} -r '%s;%s;%s;%s'" % ( # Replace version strings that are likely to change. r's/"version": ".* version .*"/"version": "[clang version]"/', diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index e901631..4c396d3 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() { // OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float // OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half // OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2 + +void real_on_scalar_from_imag_with_type_promotion() { + _Float16 _Complex a; + _Float16 b = __real__(__imag__ a); +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16> +// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float +// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float +// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float> +// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float +// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16 +// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16> + +// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2 +// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2 +// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2 +// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1 +// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float +// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0 +// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1 +// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 + +// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2 +// OGCG: %[[B_ADDR:.*]] = alloca half, align 2 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2 +// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 diff --git a/clang/test/CIR/CodeGen/delete.cpp b/clang/test/CIR/CodeGen/delete.cpp new file mode 100644 index 0000000..f21d203 --- /dev/null +++ b/clang/test/CIR/CodeGen/delete.cpp @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -mconstructor-aliases -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +typedef __typeof(sizeof(int)) size_t; + +struct SizedDelete { + void operator delete(void*, size_t); + int member; +}; +void test_sized_delete(SizedDelete *x) { + delete x; +} + +// SizedDelete::operator delete(void*, unsigned long) +// CIR: cir.func private @_ZN11SizedDeletedlEPvm(!cir.ptr<!void>, !u64i) +// LLVM: declare void @_ZN11SizedDeletedlEPvm(ptr, i64) + +// CIR: cir.func dso_local @_Z17test_sized_deleteP11SizedDelete +// CIR: %[[X:.*]] = cir.load{{.*}} %{{.*}} +// CIR: %[[X_CAST:.*]] = cir.cast(bitcast, %[[X]] : !cir.ptr<!rec_SizedDelete>), !cir.ptr<!void> +// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<4> : !u64i +// CIR: cir.call @_ZN11SizedDeletedlEPvm(%[[X_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> () + +// LLVM: define dso_local void @_Z17test_sized_deleteP11SizedDelete +// LLVM: %[[X:.*]] = load ptr, ptr %{{.*}} +// LLVM: call void @_ZN11SizedDeletedlEPvm(ptr %[[X]], i64 4) + +// OGCG: define dso_local void @_Z17test_sized_deleteP11SizedDelete +// OGCG: %[[X:.*]] = load ptr, ptr %{{.*}} +// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[X]], null +// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]] +// OGCG: [[DELETE_NOTNULL]]: +// OGCG: call void @_ZN11SizedDeletedlEPvm(ptr noundef %[[X]], i64 noundef 4) + +// This function is declared below the call in OGCG. +// OGCG: declare void @_ZN11SizedDeletedlEPvm(ptr noundef, i64 noundef) + +struct Contents { + ~Contents() {} +}; +struct Container { + Contents *contents; + ~Container(); +}; +Container::~Container() { delete contents; } + +// Contents::~Contents() +// CIR: cir.func comdat linkonce_odr @_ZN8ContentsD2Ev +// LLVM: define linkonce_odr void @_ZN8ContentsD2Ev + +// operator delete(void*, unsigned long) +// CIR: cir.func private @_ZdlPvm(!cir.ptr<!void>, !u64i) +// LLVM: declare void @_ZdlPvm(ptr, i64) + +// Container::~Container() +// CIR: cir.func dso_local @_ZN9ContainerD2Ev +// CIR: %[[THIS:.*]] = cir.load %{{.*}} +// CIR: %[[CONTENTS_PTR_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "contents"} : !cir.ptr<!rec_Container> -> !cir.ptr<!cir.ptr<!rec_Contents>> +// CIR: %[[CONTENTS_PTR:.*]] = cir.load{{.*}} %[[CONTENTS_PTR_ADDR]] +// CIR: cir.call @_ZN8ContentsD2Ev(%[[CONTENTS_PTR]]) nothrow : (!cir.ptr<!rec_Contents>) -> () +// CIR: %[[CONTENTS_CAST:.*]] = cir.cast(bitcast, %[[CONTENTS_PTR]] : !cir.ptr<!rec_Contents>), !cir.ptr<!void> +// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<1> : !u64i +// CIR: cir.call @_ZdlPvm(%[[CONTENTS_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> () + +// LLVM: define dso_local void @_ZN9ContainerD2Ev +// LLVM: %[[THIS:.*]] = load ptr, ptr %{{.*}} +// LLVM: %[[CONTENTS_PTR_ADDR:.*]] = getelementptr %struct.Container, ptr %[[THIS]], i32 0, i32 0 +// LLVM: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS_PTR_ADDR]] +// LLVM: call void @_ZN8ContentsD2Ev(ptr %[[CONTENTS_PTR]]) +// LLVM: call void @_ZdlPvm(ptr %[[CONTENTS_PTR]], i64 1) + +// OGCG: define dso_local void @_ZN9ContainerD2Ev +// OGCG: %[[THIS:.*]] = load ptr, ptr %{{.*}} +// OGCG: %[[CONTENTS:.*]] = getelementptr inbounds nuw %struct.Container, ptr %[[THIS]], i32 0, i32 0 +// OGCG: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS]] +// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[CONTENTS_PTR]], null +// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]] +// OGCG: [[DELETE_NOTNULL]]: +// OGCG: call void @_ZN8ContentsD2Ev(ptr noundef nonnull align 1 dereferenceable(1) %[[CONTENTS_PTR]]) +// OGCG: call void @_ZdlPvm(ptr noundef %[[CONTENTS_PTR]], i64 noundef 1) + +// These functions are declared/defined below the calls in OGCG. +// OGCG: define linkonce_odr void @_ZN8ContentsD2Ev +// OGCG: declare void @_ZdlPvm(ptr noundef, i64 noundef) diff --git a/clang/test/CIR/CodeGen/lang-c-cpp.cpp b/clang/test/CIR/CodeGen/lang-c-cpp.cpp index e126932..8931783 100644 --- a/clang/test/CIR/CodeGen/lang-c-cpp.cpp +++ b/clang/test/CIR/CodeGen/lang-c-cpp.cpp @@ -3,8 +3,8 @@ // RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.c.cir // RUN: FileCheck --check-prefix=CIR-C --input-file=%t.c.cir %s -// CIR-CPP: module attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}} -// CIR-C: module attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}} +// CIR-CPP: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}} +// CIR-C: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}} int main() { return 0; diff --git a/clang/test/CIR/CodeGen/module-filename.cpp b/clang/test/CIR/CodeGen/module-filename.cpp new file mode 100644 index 0000000..05e2e92 --- /dev/null +++ b/clang/test/CIR/CodeGen/module-filename.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// Normally, we try to avoid checking the filename of a test, but that's the +// entire point of this test, so we use a wildcard for the path but check the +// filename. +// CIR: module @"{{.*}}module-filename.cpp" + +int main() { + return 0; +} diff --git a/clang/test/CIR/CodeGen/opt-info-attr.cpp b/clang/test/CIR/CodeGen/opt-info-attr.cpp index 444286b..97071d7 100644 --- a/clang/test/CIR/CodeGen/opt-info-attr.cpp +++ b/clang/test/CIR/CodeGen/opt-info-attr.cpp @@ -13,10 +13,10 @@ void f() {} -// CHECK-O0: module attributes +// CHECK-O0: module{{.*}} attributes // CHECK-O0-NOT: cir.opt_info -// CHECK-O1: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}} -// CHECK-O2: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}} -// CHECK-O3: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}} -// CHECK-Os: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}} -// CHECK-Oz: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}} +// CHECK-O1: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}} +// CHECK-O2: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}} +// CHECK-O3: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}} +// CHECK-Os: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}} +// CHECK-Oz: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}} diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp index 9139651..4d57f8e 100644 --- a/clang/test/CIR/CodeGen/vbase.cpp +++ b/clang/test/CIR/CodeGen/vbase.cpp @@ -13,19 +13,29 @@ public: class Derived : public virtual Base {}; -// This is just here to force the record types to be emitted. void f() { Derived d; + d.f(); +} + +class DerivedFinal final : public virtual Base {}; + +void g() { + DerivedFinal df; + df.f(); } // CIR: !rec_Base = !cir.record<class "Base" {!cir.vptr}> // CIR: !rec_Derived = !cir.record<class "Derived" {!rec_Base}> +// CIR: !rec_DerivedFinal = !cir.record<class "DerivedFinal" {!rec_Base}> // LLVM: %class.Derived = type { %class.Base } // LLVM: %class.Base = type { ptr } +// LLVM: %class.DerivedFinal = type { %class.Base } // OGCG: %class.Derived = type { %class.Base } // OGCG: %class.Base = type { ptr } +// OGCG: %class.DerivedFinal = type { %class.Base } // Test the constructor handling for a class with a virtual base. struct A { @@ -47,6 +57,76 @@ void ppp() { B b; } // OGCG: @_ZTV1B = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }, comdat, align 8 +// CIR: cir.func {{.*}}@_Z1fv() { +// CIR: %[[D:.+]] = cir.alloca !rec_Derived, !cir.ptr<!rec_Derived>, ["d", init] +// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> () +// CIR: %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr> +// CIR: %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i> +// CIR: %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i +// CIR: %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i> +// CIR: %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i +// CIR: %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived> +// CIR: %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN:.+]] = cir.load {{.*}} %[[SLOT_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN]](%[[BASE_THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// CIR: cir.func {{.*}}@_Z1gv() { +// CIR: %[[DF:.+]] = cir.alloca !rec_DerivedFinal, !cir.ptr<!rec_DerivedFinal>, ["df", init] +// CIR: cir.call @_ZN12DerivedFinalC1Ev(%[[DF]]) nothrow : (!cir.ptr<!rec_DerivedFinal>) -> () +// CIR: %[[BASE_THIS_2:.+]] = cir.base_class_addr %[[DF]] : !cir.ptr<!rec_DerivedFinal> nonnull [0] -> !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR_2:.+]] = cir.vtable.get_vptr %[[BASE_THIS_2]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR_2:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR_2]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR_2:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR_2]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN_2:.+]] = cir.load {{.*}} %[[SLOT_PTR_2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN_2]](%[[BASE_THIS_2]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// LLVM: define {{.*}}void @_Z1fv() +// LLVM: %[[D:.+]] = alloca {{.*}} +// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]]) +// LLVM: %[[VPTR_ADDR:.+]] = load ptr, ptr %[[D]] +// LLVM: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VPTR_ADDR]], i64 -32 +// LLVM: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// LLVM: %[[ADJ_THIS:.+]] = getelementptr i8, ptr %[[D]], i64 %[[OFF]] +// LLVM: %[[VFN_TAB:.+]] = load ptr, ptr %[[ADJ_THIS]] +// LLVM: %[[SLOT0:.+]] = getelementptr inbounds ptr, ptr %[[VFN_TAB]], i32 0 +// LLVM: %[[VFN:.+]] = load ptr, ptr %[[SLOT0]] +// LLVM: call void %[[VFN]](ptr %[[ADJ_THIS]]) +// LLVM: ret void + +// LLVM: define {{.*}}void @_Z1gv() +// LLVM: %[[DF:.+]] = alloca {{.*}} +// LLVM: call void @_ZN12DerivedFinalC1Ev(ptr %[[DF]]) +// LLVM: %[[VPTR2:.+]] = load ptr, ptr %[[DF]] +// LLVM: %[[SLOT0_2:.+]] = getelementptr inbounds ptr, ptr %[[VPTR2]], i32 0 +// LLVM: %[[VFN2:.+]] = load ptr, ptr %[[SLOT0_2]] +// LLVM: call void %[[VFN2]](ptr %[[DF]]) +// LLVM: ret void + +// OGCG: define {{.*}}void @_Z1fv() +// OGCG: %[[D:.+]] = alloca {{.*}} +// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]]) +// OGCG: %[[VTABLE:.+]] = load ptr, ptr %[[D]] +// OGCG: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VTABLE]], i64 -32 +// OGCG: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// OGCG: %[[ADJ_THIS:.+]] = getelementptr inbounds i8, ptr %[[D]], i64 %[[OFF]] +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[ADJ_THIS]]) +// OGCG: ret void + +// OGCG: define {{.*}}void @_Z1gv() +// OGCG: %[[DF:.+]] = alloca {{.*}} +// OGCG: call void @_ZN12DerivedFinalC1Ev(ptr {{.*}} %[[DF]]) +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[DF]]) +// OGCG: ret void + // Constructor for B // CIR: cir.func comdat linkonce_odr @_ZN1BC1Ev(%arg0: !cir.ptr<!rec_B> // CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index 8b5379a..8bca48d 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -1322,3 +1322,23 @@ void logical_not() { // OGCG: %[[RESULT:.*]] = icmp eq <4 x i32> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index d8fdeea..f242779 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -1390,3 +1390,23 @@ void logical_not_float() { // OGCG: %[[RESULT:.*]] = fcmp oeq <4 x float> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir new file mode 100644 index 0000000..727c067 --- /dev/null +++ b/clang/test/CIR/IR/global-init.cir @@ -0,0 +1,48 @@ +// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s + +!u8i = !cir.int<u, 8> + +!rec_NeedsCtor = !cir.record<struct "NeedsCtor" padded {!u8i}> +!rec_NeedsDtor = !cir.record<struct "NeedsDtor" padded {!u8i}> +!rec_NeedsCtorDtor = !cir.record<struct "NeedsCtorDtor" padded {!u8i}> + +module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { + cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>) + cir.global external @needsCtor = ctor : !rec_NeedsCtor { + %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + } + // CHECK: cir.global external @needsCtor = ctor : !rec_NeedsCtor { + // CHECK: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + // CHECK: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + // CHECK: } + + cir.func private @_ZN9NeedsDtorD1Ev(!cir.ptr<!rec_NeedsDtor>) + cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + } + // CHECK: cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + // CHECK: %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + // CHECK: cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + // CHECK: } + + cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } dtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } + // CHECK: cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } dtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } +} diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 347cd9e..3018bb97 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -985,18 +985,21 @@ double test_mm256_cvtsd_f64(__m256d __a) { // CHECK: extractelement <4 x double> %{{.*}}, i32 0 return _mm256_cvtsd_f64(__a); } +TEST_CONSTEXPR(_mm256_cvtsd_f64((__m256d){8.0, 7.0, 6.0, 5.0}) == 8.0); int test_mm256_cvtsi256_si32(__m256i __a) { // CHECK-LABEL: test_mm256_cvtsi256_si32 // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 return _mm256_cvtsi256_si32(__a); } +TEST_CONSTEXPR(_mm256_cvtsi256_si32((__m256i)(__v8si){8, 7, 6, 5, 4, 3, 2, 1}) == 8); float test_mm256_cvtss_f32(__m256 __a) { // CHECK-LABEL: test_mm256_cvtss_f32 // CHECK: extractelement <8 x float> %{{.*}}, i32 0 return _mm256_cvtss_f32(__a); } +TEST_CONSTEXPR(_mm256_cvtss_f32((__m256){8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}) == 8.0f); __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c index ded40ca..d0ae0c7 100644 --- a/clang/test/CodeGen/X86/bmi-builtins.c +++ b/clang/test/CodeGen/X86/bmi-builtins.c @@ -1,7 +1,16 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT -// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT -// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64 + +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64 #include <immintrin.h> @@ -48,20 +57,20 @@ unsigned int test_tzcnt_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test__tzcnt_u64(unsigned long long __X) { -// TZCNT-LABEL: test__tzcnt_u64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test__tzcnt_u64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return __tzcnt_u64(__X); } long long test_mm_tzcnt_64(unsigned long long __X) { -// TZCNT-LABEL: test_mm_tzcnt_64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test_mm_tzcnt_64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _mm_tzcnt_64(__X); } unsigned long long test_tzcnt_u64(unsigned long long __X) { -// TZCNT-LABEL: test_tzcnt_u64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test_tzcnt_u64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _tzcnt_u64(__X); } #endif @@ -103,36 +112,36 @@ unsigned int test__blsr_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test__andn_u64 -// CHECK: xor i64 %{{.*}}, -1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__andn_u64 +// X64: xor i64 %{{.*}}, -1 +// X64: and i64 %{{.*}}, %{{.*}} return __andn_u64(__X, __Y); } unsigned long long test__bextr_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test__bextr_u64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test__bextr_u64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return __bextr_u64(__X, __Y); } unsigned long long test__blsi_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsi_u64 -// CHECK: sub i64 0, %{{.*}} -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsi_u64 +// X64: sub i64 0, %{{.*}} +// X64: and i64 %{{.*}}, %{{.*}} return __blsi_u64(__X); } unsigned long long test__blsmsk_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsmsk_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: xor i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsmsk_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: xor i64 %{{.*}}, %{{.*}} return __blsmsk_u64(__X); } unsigned long long test__blsr_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsr_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsr_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: and i64 %{{.*}}, %{{.*}} return __blsr_u64(__X); } #endif @@ -186,49 +195,49 @@ unsigned int test_blsr_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test_andn_u64 -// CHECK: xor i64 %{{.*}}, -1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_andn_u64 +// X64: xor i64 %{{.*}}, -1 +// X64: and i64 %{{.*}}, %{{.*}} return _andn_u64(__X, __Y); } unsigned long long test_bextr_u64(unsigned long __X, unsigned int __Y, unsigned int __Z) { -// CHECK-LABEL: test_bextr_u64 -// CHECK: and i32 %{{.*}}, 255 -// CHECK: and i32 %{{.*}}, 255 -// CHECK: shl i32 %{{.*}}, 8 -// CHECK: or i32 %{{.*}}, %{{.*}} -// CHECK: zext i32 %{{.*}} to i64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test_bextr_u64 +// X64: and i32 %{{.*}}, 255 +// X64: and i32 %{{.*}}, 255 +// X64: shl i32 %{{.*}}, 8 +// X64: or i32 %{{.*}}, %{{.*}} +// X64: zext i32 %{{.*}} to i64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return _bextr_u64(__X, __Y, __Z); } unsigned long long test_bextr2_u64(unsigned long long __X, unsigned long long __Y) { -// CHECK-LABEL: test_bextr2_u64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test_bextr2_u64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return _bextr2_u64(__X, __Y); } unsigned long long test_blsi_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsi_u64 -// CHECK: sub i64 0, %{{.*}} -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsi_u64 +// X64: sub i64 0, %{{.*}} +// X64: and i64 %{{.*}}, %{{.*}} return _blsi_u64(__X); } unsigned long long test_blsmsk_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsmsk_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: xor i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsmsk_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: xor i64 %{{.*}}, %{{.*}} return _blsmsk_u64(__X); } unsigned long long test_blsr_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsr_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsr_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: and i64 %{{.*}}, %{{.*}} return _blsr_u64(__X); } #endif diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c index 48424f5..1b2cb90 100644 --- a/clang/test/CodeGen/X86/bmi2-builtins.c +++ b/clang/test/CodeGen/X86/bmi2-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32 + #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/tbm-builtins.c b/clang/test/CodeGen/X86/tbm-builtins.c index d916627..89746bf 100644 --- a/clang/test/CodeGen/X86/tbm-builtins.c +++ b/clang/test/CodeGen/X86/tbm-builtins.c @@ -1,5 +1,12 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK + +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK #include <x86intrin.h> @@ -13,14 +20,14 @@ unsigned int test__bextri_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__bextri_u64(unsigned long long a) { - // CHECK-LABEL: test__bextri_u64 - // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2) + // X64-LABEL: test__bextri_u64 + // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2) return __bextri_u64(a, 2); } unsigned long long test__bextri_u64_bigint(unsigned long long a) { - // CHECK-LABEL: test__bextri_u64_bigint - // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887) + // X64-LABEL: test__bextri_u64_bigint + // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887) return __bextri_u64(a, 0x7fffffffffLL); } #endif @@ -34,9 +41,9 @@ unsigned int test__blcfill_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcfill_u64(unsigned long long a) { - // CHECK-LABEL: test__blcfill_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK: %{{.*}} = and i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcfill_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64: %{{.*}} = and i64 %{{.*}}, [[TMP]] return __blcfill_u64(a); } #endif @@ -51,10 +58,10 @@ unsigned int test__blci_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blci_u64(unsigned long long a) { - // CHECK-LABEL: test__blci_u64 - // CHECK: [[TMP1:%.*]] = add i64 %{{.*}}, 1 - // CHECK: [[TMP2:%.*]] = xor i64 [[TMP1]], -1 - // CHECK: %{{.*}} = or i64 %{{.*}}, [[TMP2]] + // X64-LABEL: test__blci_u64 + // X64: [[TMP1:%.*]] = add i64 %{{.*}}, 1 + // X64: [[TMP2:%.*]] = xor i64 [[TMP1]], -1 + // X64: %{{.*}} = or i64 %{{.*}}, [[TMP2]] return __blci_u64(a); } #endif @@ -69,10 +76,10 @@ unsigned int test__blcic_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcic_u64(unsigned long long a) { - // CHECK-LABEL: test__blcic_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__blcic_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] return __blcic_u64(a); } #endif @@ -86,9 +93,9 @@ unsigned int test__blcmsk_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcmsk_u64(unsigned long long a) { - // CHECK-LABEL: test__blcmsk_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcmsk_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]] return __blcmsk_u64(a); } #endif @@ -102,9 +109,9 @@ unsigned int test__blcs_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcs_u64(unsigned long long a) { - // CHECK-LABEL: test__blcs_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcs_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] return __blcs_u64(a); } #endif @@ -118,9 +125,9 @@ unsigned int test__blsfill_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blsfill_u64(unsigned long long a) { - // CHECK-LABEL: test__blsfill_u64 - // CHECK: [[TMP:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blsfill_u64 + // X64: [[TMP:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] return __blsfill_u64(a); } #endif @@ -135,10 +142,10 @@ unsigned int test__blsic_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blsic_u64(unsigned long long a) { - // CHECK-LABEL: test__blsic_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__blsic_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] return __blsic_u64(a); } #endif @@ -153,10 +160,10 @@ unsigned int test__t1mskc_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__t1mskc_u64(unsigned long long a) { - // CHECK-LABEL: test__t1mskc_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__t1mskc_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] return __t1mskc_u64(a); } #endif @@ -171,10 +178,10 @@ unsigned int test__tzmsk_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__tzmsk_u64(unsigned long long a) { - // CHECK-LABEL: test__tzmsk_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__tzmsk_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] return __tzmsk_u64(a); } #endif diff --git a/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c new file mode 100644 index 0000000..ef68c79 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c @@ -0,0 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited | FileCheck %s + +// CHECK-LABEL: define dso_local void @test_locals( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IMG:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[IMG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMG]] to ptr +// CHECK-NEXT: #dbg_declare(ptr addrspace(5) [[IMG]], [[META11:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META14:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG15:![0-9]+]] +// CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]] +// +void test_locals(void) { + __amdgpu_texture_t img; + (void)img; +} diff --git a/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp new file mode 100644 index 0000000..0dbd517 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp @@ -0,0 +1,7 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s +namespace std { class type_info; } +auto &a = typeid(__amdgpu_texture_t); +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp index 5920ced..137a49b 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj( // CHECK-NEXT: entry: @@ -21,6 +24,43 @@ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter64Py( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter32PVj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter64PVy( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() { // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global32() { global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); @@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global64() { global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); @@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant32() { __UINT32_TYPE__ local_val; @@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant64() { __UINT64_TYPE__ local_val; @@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope64() { __attribute__((shared)) __UINT64_TYPE__ val; diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp index 1e977dd..dd1ca45 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -12,6 +15,25 @@ // CHECK-NEXT: fence syncscope("agent") acq_rel // CHECK-NEXT: fence syncscope("workgroup") release // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv( +// GCN-SAME: ) #[[ATTR0:[0-9]+]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst +// GCN-NEXT: fence syncscope("agent") acquire +// GCN-NEXT: fence seq_cst +// GCN-NEXT: fence syncscope("agent") acq_rel +// GCN-NEXT: fence syncscope("workgroup") release +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire +// AMDGCNSPIRV-NEXT: fence seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release +// AMDGCNSPIRV-NEXT: ret void // void test_memory_fence_success() { @@ -35,6 +57,25 @@ void test_memory_fence_success() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_localv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_local() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -58,6 +99,25 @@ void test_local() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z11test_globalv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]] +// GCN-NEXT: fence seq_cst, !mmra [[META4]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: ret void // void test_global() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global"); @@ -80,6 +140,25 @@ void test_global() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_imagev( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_image() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -99,13 +178,33 @@ void test_image() { // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_mixedv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// AMDGCNSPIRV-NEXT: ret void // void test_mixed() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global"); __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local"); } -//. // CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} // CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} // CHECK: [[META5]] = !{[[META4]], [[META3]]} //. +// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// GCN: [[META5]] = !{[[META4]], [[META3]]} +//. +// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]} +//. diff --git a/clang/test/CodeGenCXX/gh56652.cpp b/clang/test/CodeGenCXX/gh56652.cpp new file mode 100644 index 0000000..06a496e --- /dev/null +++ b/clang/test/CodeGenCXX/gh56652.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s + +namespace GH56652{ + +struct foo {}; + +template <typename T> struct bar { + using type = T; + + template <foo> inline static constexpr auto b = true; +}; + +template <typename T> +concept C = requires(T a) { T::template b<foo{}>; }; + +template <typename T> auto fn(T) { + if constexpr (!C<T>) + return foo{}; + else + return T{}; +} + +auto a = decltype(fn(bar<int>{})){}; + +} + +namespace GH116319 { + +template <int = 0> struct a { +template <class> static constexpr auto b = 2; +template <class> static void c() noexcept(noexcept(b<int>)) {} +}; + +void test() { a<>::c<int>(); } + + +} + +// CHECK: %"struct.GH56652::bar" = type { i8 } +// CHECK: $_ZN8GH1163191aILi0EE1cIiEEvv = comdat any +// CHECK: @_ZN7GH566521aE = global %"struct.GH56652::bar" undef diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index 19ab656..7cd3f14 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -1,13 +1,13 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s typedef unsigned int uint; typedef unsigned long ulong; @@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() { } // CHECK-LABEL: @test_global_add_f32 -// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} #if !defined(__SPIRV__) void test_global_add_f32(float *rtn, global float *addr, float x) { #else diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index 5f202ba..6bb20bf 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -1,9 +1,9 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s #pragma OPENCL EXTENSION cl_khr_fp16 : enable @@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1) // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}} #if !defined(__SPIRV__) @@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) { // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}} @@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) { // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 039d032..ab0b0b9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1231,7 +1231,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu // CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4 res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4 res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent"); // CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4 diff --git a/clang/test/Driver/modules-print-library-module-manifest-path.cpp b/clang/test/Driver/modules-print-library-module-manifest-path.cpp index 7606713..af0f124 100644 --- a/clang/test/Driver/modules-print-library-module-manifest-path.cpp +++ b/clang/test/Driver/modules-print-library-module-manifest-path.cpp @@ -18,6 +18,14 @@ // RUN: --target=x86_64-linux-gnu 2>&1 \ // RUN: | FileCheck libcxx.cpp +// check that -nostdlib causes no library-provided module manifest to +// be reported, even when libc++.modules.json is present. +// RUN: %clang -print-library-module-manifest-path \ +// RUN: -nostdlib \ +// RUN: -resource-dir=%t/Inputs/usr/lib/x86_64-linux-gnu \ +// RUN: --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck libcxx-no-module-json.cpp + // for macos there is a different directory structure // where the library and libc++.modules.json file are in lib // directly but headers are in clang/ver directory which diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index ced5bca..8eb9ea0 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -148,7 +148,7 @@ // init_captures checked below -#if check(modules, 0, 0, 0, 0, 0, 0, 0) +#if check(modules, 0, 0, 0, 0, 1, 1, 1) // FIXME: 201907 in C++20 #error "wrong value for __cpp_modules" #endif diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp index 83632db..cb4bcc9 100644 --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -27,7 +27,6 @@ struct S { ~S() {} }; - template <typename T, int length> T tmain() { T t; @@ -60,6 +59,15 @@ T tmain() { } extern S<float> **foo(); +int g_arr[10]; + +void reductionArrayElement() { +#pragma omp parallel +#pragma omp for reduction(+:g_arr[1]) + for (int i = 0; i < 10; i++) { + g_arr[1] += i; + } +} int main() { #ifdef LAMBDA @@ -164,6 +172,7 @@ int main() { #pragma omp for reduction(& : var3) for (int i = 0; i < 10; ++i) ; + reductionArrayElement(); return tmain<int, 42>(); #endif } @@ -535,6 +544,26 @@ int main() { //. // CHECK4: @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer, align 8 //. + +// CHECK1-LABEL: define {{.*}}reductionArrayElement{{.*}}.omp_outlined{{.*}} +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1: [[G_ARR:%.*]] = alloca i32, align 4 +// CHECK1: [[TMP0:%.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr @g_arr to i64){{.*}} +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[G_ARR:%.*]], i64 [[TMP0]] +// CHECK1: omp.inner.for.body: +// CHECK1: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]],{{.+}} +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void {{.*}}__kmpc_for_static_fini{{.+}} +// CHECK1: {{.*}}call i32 {{.*}}__kmpc_reduce{{.+}} +// CHECK1: omp.reduction.default: +// CHECK1-NEXT: call void @__kmpc_barrier{{.+}} +// CHECK1-NEXT: ret void +// + // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: @@ -614,6 +643,7 @@ int main() { // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.11, ptr [[TMP7]]) // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR3]], align 8 // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.12, ptr [[TMP8]]) +// CHECK1-NEXT: call void {{.*}}reductionArrayElement{{.*}} // CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() // CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp new file mode 100644 index 0000000..283f588 --- /dev/null +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -0,0 +1,397 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2() { + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + // DUMP-NEXT: OMPPartialClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 4 + // DUMP-NEXT: IntegerLiteral {{.*}} 4 + #pragma omp unroll partial(4) + // PRINT: #pragma omp fuse + // DUMP-NEXT: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + +} + +//PRINT-LABEL: void foo3( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3 +template<int Factor1, int Factor2> +void foo3() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp unroll partial(Factor1) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor1) + // PRINT: for (int i = 0; i < 12; i += 1) + // DUMP: ForStmt + for (int i = 0; i < 12; i += 1) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: #pragma omp unroll partial(Factor2) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor2) + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } +} + +// Also test instantiating the template. +void tfoo3() { + foo3<4,2>(); +} + +//PRINT-LABEL: void foo4( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4 +template<typename T, T Step> +void foo4(int start, int end) { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (T i = start; i < end; i += Step) + // DUMP: ForStmt + for (T i = start; i < end; i += Step) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + + // PRINT: for (T j = end; j > start; j -= Step) + // DUMP: ForStmt + for (T j = end; j > start; j -= Step) { + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + + } +} + +// Also test instantiating the template. +void tfoo4() { + foo4<int, 4>(0, 64); +} + + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5() { + double arr[128], arr2[128]; + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT-NEXT: for (auto &&a : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&a: arr) + // PRINT: body(a) + // DUMP: CallExpr + body(a); + // PRINT: for (double v = 42; auto &&b : arr) + // DUMP: CXXForRangeStmt + for (double v = 42; auto &&b: arr) + // PRINT: body(b, v); + // DUMP: CallExpr + body(b, v); + // PRINT: for (auto &&c : arr2) + // DUMP: CXXForRangeStmt + for (auto &&c: arr2) + // PRINT: body(c) + // DUMP: CallExpr + body(c); + + } + +} + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionDecl {{.*}} foo6 +void foo6() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i <= 10; ++i) + // DUMP: ForStmt + for (int i = 0; i <= 10; ++i) + body(i); + // PRINT: for (int j = 0; j < 100; ++j) + // DUMP: ForStmt + for(int j = 0; j < 100; ++j) + body(j); + } + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(4) + // PRINT: for (int k = 0; k < 250; ++k) + // DUMP: ForStmt + for (int k = 0; k < 250; ++k) + body(k); + } +} + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + } + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + } + } + } + } + +} + +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +//PRINT-LABEL: void foo9( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9 +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C +template<int F, int C> +void foo9() { + // PRINT: #pragma omp fuse looprange(F,C) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(F,C) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + + } +} + +// Also test instantiating the template. +void tfoo9() { + foo9<1, 2>(); +} + +// PRINT-LABEL: void foo10( +// DUMP-LABEL: FunctionDecl {{.*}} foo10 +void foo10() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int ii = 0; ii < 10; ii += 2) + // DUMP: ForStmt + for (int ii = 0; ii < 10; ii += 2) + // PRINT: body(ii) + // DUMP: CallExpr + body(ii); + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + { + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int jj = 10; jj > 0; --jj) + // DUMP: ForStmt + for (int jj = 10; jj > 0; --jj) + // PRINT: body(jj) + // DUMP: CallExpr + body(jj); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + // PRINT: for (int kk = 0; kk <= 10; ++kk) + // DUMP: ForStmt + for (int kk = 0; kk <= 10; ++kk) + // PRINT: body(kk) + // DUMP: CallExpr + body(kk); + } + } + +} + +#endif diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp new file mode 100644 index 0000000..742c280 --- /dev/null +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -0,0 +1,2328 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5 +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +//placeholder for loop body code. +extern "C" void body(...) {} + +extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) { + int i,j; + #pragma omp fuse + { + for(i = start1; i < end1; i += step1) body(i); + for(j = start2; j < end2; j += step2) body(j); + } + +} + +template <typename T> +void foo2(T start, T end, T step){ + T i,j,k; + #pragma omp fuse + { + for(i = start; i < end; i += step) body(i); + for(j = end; j > start; j -= step) body(j); + for(k = start+step; k < end+step; k += step) body(k); + } +} + +extern "C" void tfoo2() { + foo2<int>(0, 64, 4); +} + +extern "C" void foo3() { + double arr[256]; + #pragma omp fuse + { + #pragma omp fuse + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + +extern "C" void foo4() { + double arr[256]; + + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 64; ++k) body(k); + for(int c = 42; auto &&v: arr) body(c,v); + } +} + +// This exemplifies the usage of loop transformations that generate +// more than top level canonical loop nests (e.g split, loopranged fuse...) +extern "C" void foo5() { + double arr[256]; + #pragma omp fuse looprange(2,2) + { + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 512; ++k) body(k); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + + +#endif +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK1: [[IF_THEN22]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]]) +// CHECK1-NEXT: br label %[[IF_END27]] +// CHECK1: [[IF_END27]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @tfoo2( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] +// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK1: [[COND_TRUE30]]: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32:.*]] +// CHECK1: [[COND_FALSE31]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32]] +// CHECK1: [[COND_END32]]: +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] +// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] +// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK1: [[IF_THEN40]]: +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] +// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] +// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]]) +// CHECK1-NEXT: br label %[[IF_END45]] +// CHECK1: [[IF_END45]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK1: [[COND_TRUE42]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: br label %[[COND_END44:.*]] +// CHECK1: [[COND_FALSE43]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END44]] +// CHECK1: [[COND_END44]]: +// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK1: [[COND_TRUE48]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50:.*]] +// CHECK1: [[COND_FALSE49]]: +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50]] +// CHECK1: [[COND_END50]]: +// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN62]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK1: [[IF_THEN68]]: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END73]] +// CHECK1: [[IF_END73]]: +// CHECK1-NEXT: br label %[[IF_END74]] +// CHECK1: [[IF_END74]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK1: [[IF_THEN76]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK1-NEXT: br label %[[IF_END81]] +// CHECK1: [[IF_END81]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK1: [[IF_THEN83]]: +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK1-NEXT: br label %[[IF_END88]] +// CHECK1: [[IF_END88]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2:.*]] +// CHECK1: [[FOR_COND2]]: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK1: [[FOR_BODY4]]: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK1: [[IF_THEN9]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK1-NEXT: br label %[[IF_END14]] +// CHECK1: [[IF_END14]]: +// CHECK1-NEXT: br label %[[FOR_INC15:.*]] +// CHECK1: [[FOR_INC15]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: [[FOR_END17]]: +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19:.*]] +// CHECK1: [[FOR_COND19]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK1: [[FOR_BODY21]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK1-NEXT: br label %[[FOR_INC22:.*]] +// CHECK1: [[FOR_INC22]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19]] +// CHECK1: [[FOR_END23]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK1: [[COND_TRUE24]]: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: br label %[[COND_END26:.*]] +// CHECK1: [[COND_FALSE25]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END26]] +// CHECK1: [[COND_END26]]: +// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30:.*]] +// CHECK1: [[FOR_COND30]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK1: [[FOR_BODY32]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN41]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[IF_END53]] +// CHECK1: [[IF_END53]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK1: [[IF_THEN55]]: +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END60]] +// CHECK1: [[IF_END60]]: +// CHECK1-NEXT: br label %[[FOR_INC61:.*]] +// CHECK1: [[FOR_INC61]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: [[FOR_END63]]: +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70:.*]] +// CHECK1: [[FOR_COND70]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK1: [[FOR_BODY72]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK1-NEXT: br label %[[FOR_INC73:.*]] +// CHECK1: [[FOR_INC73]]: +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70]] +// CHECK1: [[FOR_END74]]: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK2: [[IF_THEN22]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] +// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] +// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]]) +// CHECK2-NEXT: br label %[[IF_END27]] +// CHECK2: [[IF_END27]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK2: [[COND_TRUE42]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: br label %[[COND_END44:.*]] +// CHECK2: [[COND_FALSE43]]: +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END44]] +// CHECK2: [[COND_END44]]: +// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK2: [[COND_TRUE48]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50:.*]] +// CHECK2: [[COND_FALSE49]]: +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50]] +// CHECK2: [[COND_END50]]: +// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN62]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK2: [[IF_THEN68]]: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END73]] +// CHECK2: [[IF_END73]]: +// CHECK2-NEXT: br label %[[IF_END74]] +// CHECK2: [[IF_END74]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK2: [[IF_THEN76]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK2-NEXT: br label %[[IF_END81]] +// CHECK2: [[IF_END81]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK2: [[IF_THEN83]]: +// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK2-NEXT: br label %[[IF_END88]] +// CHECK2: [[IF_END88]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2:.*]] +// CHECK2: [[FOR_COND2]]: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK2: [[FOR_BODY4]]: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK2: [[IF_THEN9]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK2-NEXT: br label %[[IF_END14]] +// CHECK2: [[IF_END14]]: +// CHECK2-NEXT: br label %[[FOR_INC15:.*]] +// CHECK2: [[FOR_INC15]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: [[FOR_END17]]: +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19:.*]] +// CHECK2: [[FOR_COND19]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK2: [[FOR_BODY21]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK2-NEXT: br label %[[FOR_INC22:.*]] +// CHECK2: [[FOR_INC22]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19]] +// CHECK2: [[FOR_END23]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK2: [[COND_TRUE24]]: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: br label %[[COND_END26:.*]] +// CHECK2: [[COND_FALSE25]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END26]] +// CHECK2: [[COND_END26]]: +// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30:.*]] +// CHECK2: [[FOR_COND30]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK2: [[FOR_BODY32]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN41]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[IF_END53]] +// CHECK2: [[IF_END53]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK2: [[IF_THEN55]]: +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END60]] +// CHECK2: [[IF_END60]]: +// CHECK2-NEXT: br label %[[FOR_INC61:.*]] +// CHECK2: [[FOR_INC61]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: [[FOR_END63]]: +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70:.*]] +// CHECK2: [[FOR_COND70]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK2: [[FOR_BODY72]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK2-NEXT: br label %[[FOR_INC73:.*]] +// CHECK2: [[FOR_INC73]]: +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70]] +// CHECK2: [[FOR_END74]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo2( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] +// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 +// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK2: [[COND_TRUE30]]: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32:.*]] +// CHECK2: [[COND_FALSE31]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32]] +// CHECK2: [[COND_END32]]: +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] +// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK2: [[IF_THEN40]]: +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] +// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] +// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]]) +// CHECK2-NEXT: br label %[[IF_END45]] +// CHECK2: [[IF_END45]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp new file mode 100644 index 0000000..b86ce95 --- /dev/null +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -0,0 +1,209 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + ; + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + {int bar = 0;} + + // expected-error@+4 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + int x = 2; + } + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp fuse + } + + // expected-warning@+1 {{extra tokens at the end of '#pragma omp fuse' are ignored}} + #pragma omp fuse foo + { + for (int i = 0; i < 7; ++i) + ; + for(int j = 0; j < 100; ++j); + + } + + + // expected-error@+1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}} + #pragma omp fuse final(0) + { + for (int i = 0; i < 7; ++i) + ; + for(int j = 0; j < 100; ++j); + + } + + //expected-error@+3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} + #pragma omp fuse + { + for(int i = 0; i < 10; i*=2) { + ; + } + for(int j = 0; j < 100; ++j); + } + + //expected-error@+2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} + #pragma omp fuse + {} + + //expected-error@+3 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + #pragma omp unroll full + for(int i = 0; i < 10; ++i); + + for(int j = 0; j < 10; ++j); + } + + //expected-warning@+2 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + } + + //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(1, 1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, -1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, 0) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + const int x = 1; + constexpr int y = 4; + //expected-error@+1 {{looprange clause selects loops from 1 to 4 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(x,y) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{looprange clause selects loops from 1 to 420 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(1,420) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{looprange clause selects loops from 1 to 6 but this exceeds the number of loops (5) in the loop sequence}} + #pragma omp fuse looprange(1,6) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + // This fusion results in 2 loops + #pragma omp fuse looprange(1,2) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + } + + //expected-error@+1 {{looprange clause selects loops from 2 to 4 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(2,3) + { + #pragma omp unroll partial(2) + for(int i = 0; i < 10; ++i); + + #pragma omp reverse + for(int j = 0; j < 10; ++j); + + #pragma omp fuse + { + { + #pragma omp reverse + for(int j = 0; j < 10; ++j); + } + for(int k = 0; k < 50; ++k); + } + } +} + +// In a template context, but expression itself not instantiation-dependent +template <typename T> +static void templated_func() { + + //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(2,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{looprange clause selects loops from 3 to 5 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(3,3) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + +} + +template <int V> +static void templated_func_value_dependent() { + + //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(V,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +template <typename T> +static void templated_func_type_dependent() { + constexpr T s = 1; + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(s,s-1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + + +void template_inst() { + // expected-note@+1 {{in instantiation of function template specialization 'templated_func<int>' requested here}} + templated_func<int>(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}} + templated_func_value_dependent<1>(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_type_dependent<int>' requested here}} + templated_func_type_dependent<int>(); +} + + diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp index 7ffb7aae..8c7a778 100644 --- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp +++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp @@ -1,9 +1,7 @@ -// RUN: %clang_cc1 -std=c++20 %s -verify=cxx20 -// RUN: %clang_cc1 -std=c++23 %s -verify=cxx23 -// RUN: %clang_cc1 -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 -// RUN: %clang_cc1 -std=c++23 -pedantic %s -verify=cxx23 - -//cxx23-no-diagnostics +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++20 %s -verify=cxx20 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 %s -verify=cxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -pedantic %s -verify=cxx23 auto L1 = [] constexpr {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} @@ -14,3 +12,25 @@ auto L3 = [] static {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} // cxx20-warning@-2 {{static lambdas are a C++23 extension}} // precxx23-warning@-3 {{static lambdas are incompatible with C++ standards before C++23}} + +namespace GH161070 { +void t1() { int a = [] __arm_streaming; } +// precxx23-error@-1 {{'__arm_streaming' cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{'__arm_streaming' in this position is a C++23 extension}} +// precxx23-warning@-8 {{'__arm_streaming' in this position is incompatible with C++ standards before C++23}} + +void t2() { int a = [] [[assume(true)]]; } +// precxx23-error@-1 {{'assume' attribute cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'assume' attribute cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'assume' attribute cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{an attribute specifier sequence in this position is a C++23 extension}} +// precxx23-warning@-8 {{an attribute specifier sequence in this position is incompatible with C++ standards before C++23}} +} diff --git a/clang/test/SemaCUDA/vararg.cu b/clang/test/SemaCUDA/vararg.cu index 34ef367..0238f42 100644 --- a/clang/test/SemaCUDA/vararg.cu +++ b/clang/test/SemaCUDA/vararg.cu @@ -10,7 +10,7 @@ #include <stdarg.h> #include "Inputs/cuda.h" -__device__ void foo() { +__global__ void foo() { va_list list; va_arg(list, int); #ifdef EXPECT_VA_ARG_ERR diff --git a/clang/test/SemaCXX/amdgpu-image-rsrc.cpp b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp new file mode 100644 index 0000000..61a82d4 --- /dev/null +++ b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp @@ -0,0 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_texture_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_texture_t' is not allowed}} + reinterpret_cast<__amdgpu_texture_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_texture_t' is not allowed}} + (void)(v + v); // expected-error {{invalid operands to binary expression ('__amdgpu_texture_t' and '__amdgpu_texture_t')}} + int x(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_texture_t'}} + __amdgpu_texture_t k; +} + +template<class T> void bar(T); +void use(__amdgpu_texture_t r) { bar(r); } +struct S { __amdgpu_texture_t r; int a; }; diff --git a/clang/test/SemaCXX/bitfield-layout.cpp b/clang/test/SemaCXX/bitfield-layout.cpp index 7efd1d3..f30218b 100644 --- a/clang/test/SemaCXX/bitfield-layout.cpp +++ b/clang/test/SemaCXX/bitfield-layout.cpp @@ -35,7 +35,7 @@ CHECK_SIZE(Test4, 8); CHECK_ALIGN(Test4, 8); struct Test5 { - char c : 0x100000001; // expected-warning {{width of bit-field 'c' (4294967297 bits) exceeds the width of its type; value will be truncated to 8 bits}} + char c : 0x100000001; // expected-warning {{width of bit-field 'c' (4'294'967'297 bits) exceeds the width of its type; value will be truncated to 8 bits}} }; // Size and align don't really matter here, just make sure we don't crash. CHECK_SIZE(Test5, 1); diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp index 739485b..45a4c4c 100644 --- a/clang/test/SemaCXX/decltype.cpp +++ b/clang/test/SemaCXX/decltype.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wno-c99-designator %s +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wno-c99-designator %s // PR5290 int const f0(); @@ -156,6 +157,8 @@ struct A { } }; + + // This shouldn't crash. static_assert(A<int>().f<int>() == 0, ""); // The result should not be dependent. @@ -163,6 +166,81 @@ static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion // expected-note@-1 {{expression evaluates to '0 != 0'}} } + +#if __cplusplus >= 201703L +namespace GH160497 { + +template <class> struct S { + template <class> + inline static auto mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return 42; + }(); +}; + +using T = decltype(S<void>::mem<void>); + // expected-note@-1 {{in instantiation of static data member 'GH160497::S<void>::mem<void>' requested here}} + + +template <class> struct S2 { + template <class> + inline static auto* mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return static_cast<int*>(nullptr); + }(); +}; + +using T2 = decltype(S2<void>::mem<void>); +//expected-note@-1 {{in instantiation of static data member 'GH160497::S2<void>::mem<void>' requested here}} + +template <class> struct S3 { + template <class> + inline static int mem = // Check we don't instantiate when the type is not deduced. + [] { static_assert(false); + return 42; + }(); +}; + +using T = decltype(S3<void>::mem<void>); +} + +namespace N1 { + +template<class> +struct S { + template<class> + inline static auto mem = 42; +}; + +using T = decltype(S<void>::mem<void>); + +T y = 42; + +} + +namespace GH161196 { + +template <typename> struct A { + static constexpr int digits = 0; +}; + +template <typename> struct B { + template <int, typename MaskInt = int, int = A<MaskInt>::digits> + static constexpr auto XBitMask = 0; +}; + +struct C { + using ReferenceHost = B<int>; + template <int> static decltype(ReferenceHost::XBitMask<0>) XBitMask; +}; + +void test() { (void)C::XBitMask<0>; } + +} +#endif + template<typename> class conditional { }; diff --git a/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp b/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp index 097ada3..436dfb9 100644 --- a/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp +++ b/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp @@ -17,8 +17,7 @@ constexpr bool A<x>::far() { b.data_member; requires A<x-1>::far(); // #Invalid // expected-error@#Invalid {{recursive template instantiation exceeded maximum depth}} - // expected-note@#Invalid {{in instantiation}} - // expected-note@#Invalid 2 {{while}} + // expected-note@#Invalid 3 {{while}} // expected-note@#Invalid {{contexts in backtrace}} // expected-note@#Invalid {{increase recursive template instantiation depth}} }; diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp index 3f01247..d49330f 100644 --- a/clang/test/SemaCXX/type-traits.cpp +++ b/clang/test/SemaCXX/type-traits.cpp @@ -2038,6 +2038,49 @@ void is_implicit_lifetime(int n) { static_assert(__builtin_is_implicit_lifetime(int * __restrict)); } +namespace GH160610 { +class NonAggregate { +public: + NonAggregate() = default; + + NonAggregate(const NonAggregate&) = delete; + NonAggregate& operator=(const NonAggregate&) = delete; +private: + int num; +}; + +class DataMemberInitializer { +public: + DataMemberInitializer() = default; + + DataMemberInitializer(const DataMemberInitializer&) = delete; + DataMemberInitializer& operator=(const DataMemberInitializer&) = delete; +private: + int num = 0; +}; + +class UserProvidedConstructor { +public: + UserProvidedConstructor() {} + + UserProvidedConstructor(const UserProvidedConstructor&) = delete; + UserProvidedConstructor& operator=(const UserProvidedConstructor&) = delete; +}; + +static_assert(__builtin_is_implicit_lifetime(NonAggregate)); +static_assert(!__builtin_is_implicit_lifetime(DataMemberInitializer)); +static_assert(!__builtin_is_implicit_lifetime(UserProvidedConstructor)); + +#if __cplusplus >= 202002L +template <typename T> +class Tpl { + Tpl() requires false = default ; +}; +static_assert(!__builtin_is_implicit_lifetime(Tpl<int>)); + +#endif +} + void is_signed() { //static_assert(__is_signed(char)); diff --git a/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl new file mode 100644 index 0000000..dc56494 --- /dev/null +++ b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl @@ -0,0 +1,13 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa %s +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa %s + +void f() { + int n = 3; + __amdgpu_texture_t v = (__amdgpu_texture_t)0; // expected-error {{used type '__amdgpu_texture_t' where arithmetic or pointer type is required}} + int k = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_texture_t'}} + (void)(v + v); // expected-error {{invalid operands}} + __amdgpu_texture_t r; + int *p = (int*)r; // expected-error {{operand of type '__amdgpu_texture_t' where arithmetic or pointer type is required}} +} diff --git a/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp new file mode 100644 index 0000000..51b3f72 --- /dev/null +++ b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp @@ -0,0 +1,12 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s + +void foo() { +#pragma omp target + { + int n = 5; + __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}} + (void)(v + v); // expected-error {{invalid operands to binary expression}} + } +} diff --git a/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp b/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp index 2b519e9..66fd1af 100644 --- a/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp +++ b/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp @@ -2,5 +2,6 @@ template<int N> struct S { }; template<typename T> S<T() + T()> operator+(T, T); // expected-error {{instantiation exceeded maximum depth}} expected-note 2{{while substituting}} +// expected-note@-1 {{use -ftemplate-depth=N to increase recursive template instantiation depth}} S<0> s; int k = s + s; // expected-note {{while substituting}} diff --git a/clang/test/SemaTemplate/instantiation-depth-subst.cpp b/clang/test/SemaTemplate/instantiation-depth-subst.cpp index 062a8ed..17944bc 100644 --- a/clang/test/SemaTemplate/instantiation-depth-subst.cpp +++ b/clang/test/SemaTemplate/instantiation-depth-subst.cpp @@ -3,7 +3,8 @@ // PR9793 template<typename T> auto f(T t) -> decltype(f(t)); // \ // expected-error {{recursive template instantiation exceeded maximum depth of 2}} \ -// expected-note 2 {{while substituting}} +// expected-note 2 {{while substituting}} \ +// expected-note {{use -ftemplate-depth=N to increase recursive template instantiation depth}} struct S {}; int k = f(S{}); // expected-note {{while substituting}} diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 0e2758d..e41f4eb 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -420,7 +420,7 @@ public: std::vector<ModuleDeps *> NewMDs; { std::unique_lock<std::mutex> ul(Lock); - for (const ModuleDeps &MD : Graph) { + for (ModuleDeps &MD : Graph) { auto I = Modules.find({MD.ID, 0}); if (I != Modules.end()) { I->first.InputIndex = std::min(I->first.InputIndex, InputIndex); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 5aab743..30e2be7 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2148,6 +2148,9 @@ public: void VisitOMPUnrollDirective(const OMPUnrollDirective *D); void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D); + void VisitOMPCanonicalLoopSequenceTransformationDirective( + const OMPCanonicalLoopSequenceTransformationDirective *D); + void VisitOMPFuseDirective(const OMPFuseDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -2353,6 +2356,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) { Visitor->AddStmt(C->getFactor()); } +void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + Visitor->AddStmt(C->getFirst()); + Visitor->AddStmt(C->getCount()); +} + void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { Visitor->AddStmt(C->getAllocator()); } @@ -3317,6 +3325,15 @@ void EnqueueVisitor::VisitOMPInterchangeDirective( VisitOMPCanonicalLoopNestTransformationDirective(D); } +void EnqueueVisitor::VisitOMPCanonicalLoopSequenceTransformationDirective( + const OMPCanonicalLoopSequenceTransformationDirective *D) { + VisitOMPExecutableDirective(D); +} + +void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) { + VisitOMPCanonicalLoopSequenceTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6275,6 +6292,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPReverseDirective"); case CXCursor_OMPInterchangeDirective: return cxstring::createRef("OMPInterchangeDirective"); + case CXCursor_OMPFuseDirective: + return cxstring::createRef("OMPFuseDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 3c40624..56f113c 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -687,6 +687,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPInterchangeDirectiveClass: K = CXCursor_OMPInterchangeDirective; break; + case Stmt::OMPFuseDirectiveClass: + K = CXCursor_OMPFuseDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 6a3385a..fef7036 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -1364,6 +1364,27 @@ TEST_F(FormatTest, FormatIfWithoutCompoundStatementButElseWith) { AllowsMergedIf); } +TEST_F(FormatTest, WrapMultipleStatementIfAndElseBraces) { + auto Style = getLLVMStyle(); + Style.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Always; + Style.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_AllIfsAndElse; + Style.BreakBeforeBraces = FormatStyle::BS_Custom; + Style.BraceWrapping.AfterControlStatement = FormatStyle::BWACS_Always; + Style.BraceWrapping.BeforeElse = true; + + verifyFormat("if (x)\n" + "{\n" + " ++x;\n" + " --y;\n" + "}\n" + "else\n" + "{\n" + " --x;\n" + " ++y;\n" + "}", + Style); +} + TEST_F(FormatTest, FormatLoopsWithoutCompoundStatement) { verifyFormat("while (true)\n" " ;"); |