aboutsummaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/bindings/python/clang/cindex.py3
-rw-r--r--clang/docs/OpenMPSupport.rst2
-rw-r--r--clang/docs/ReleaseNotes.rst4
-rw-r--r--clang/include/clang-c/Index.h4
-rw-r--r--clang/include/clang/AST/OpenMPClause.h74
-rw-r--r--clang/include/clang/AST/RecursiveASTVisitor.h11
-rw-r--r--clang/include/clang/AST/StmtOpenMP.h196
-rw-r--r--clang/include/clang/Basic/AMDGPUTypes.def1
-rw-r--r--clang/include/clang/Basic/Builtins.def1
-rw-r--r--clang/include/clang/Basic/Diagnostic.h17
-rw-r--r--clang/include/clang/Basic/DiagnosticParseKinds.td2
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td18
-rw-r--r--clang/include/clang/Basic/OpenMPKinds.h7
-rw-r--r--clang/include/clang/Basic/StmtNodes.td4
-rw-r--r--clang/include/clang/Basic/TargetInfo.h4
-rw-r--r--clang/include/clang/CIR/Dialect/IR/CIROps.td39
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h2
-rw-r--r--clang/include/clang/Parse/Parser.h3
-rw-r--r--clang/include/clang/Sema/Sema.h4
-rw-r--r--clang/include/clang/Sema/SemaOpenMP.h89
-rw-r--r--clang/include/clang/Serialization/ASTBitCodes.h1
-rw-r--r--clang/lib/AST/ASTContext.cpp4
-rw-r--r--clang/lib/AST/ByteCode/Compiler.cpp18
-rw-r--r--clang/lib/AST/ByteCode/Compiler.h1
-rw-r--r--clang/lib/AST/ByteCode/Context.cpp15
-rw-r--r--clang/lib/AST/ByteCode/Interp.cpp2
-rw-r--r--clang/lib/AST/ByteCode/Interp.h15
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp145
-rw-r--r--clang/lib/AST/ByteCode/InterpFrame.cpp2
-rw-r--r--clang/lib/AST/ByteCode/Opcodes.td1
-rw-r--r--clang/lib/AST/ByteCode/Pointer.h1
-rw-r--r--clang/lib/AST/OpenMPClause.cpp35
-rw-r--r--clang/lib/AST/StmtOpenMP.cpp73
-rw-r--r--clang/lib/AST/StmtPrinter.cpp5
-rw-r--r--clang/lib/AST/StmtProfile.cpp16
-rw-r--r--clang/lib/Basic/OpenMPKinds.cpp11
-rw-r--r--clang/lib/Basic/TargetInfo.cpp49
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenClass.cpp42
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp215
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp11
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h5
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenModule.cpp13
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmt.cpp1
-rw-r--r--clang/lib/CIR/Dialect/IR/CIRDialect.cpp128
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp5
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp64
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.h1
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp61
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp42
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h1
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.cpp4
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp27
-rw-r--r--clang/lib/Driver/Driver.cpp3
-rw-r--r--clang/lib/Format/FormatToken.h2
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp49
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp25
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp5
-rw-r--r--clang/lib/Frontend/ModuleDependencyCollector.cpp11
-rw-r--r--clang/lib/Headers/avxintrin.h23
-rw-r--r--clang/lib/Parse/ParseExprCXX.cpp2
-rw-r--r--clang/lib/Parse/ParseOpenMP.cpp36
-rw-r--r--clang/lib/Sema/AnalysisBasedWarnings.cpp7
-rw-r--r--clang/lib/Sema/SemaConcept.cpp2
-rw-r--r--clang/lib/Sema/SemaDecl.cpp3
-rw-r--r--clang/lib/Sema/SemaExceptionSpec.cpp1
-rw-r--r--clang/lib/Sema/SemaExpr.cpp10
-rw-r--r--clang/lib/Sema/SemaOpenACCAtomic.cpp7
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp833
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp2
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiate.cpp88
-rw-r--r--clang/lib/Sema/SemaTypeTraits.cpp17
-rw-r--r--clang/lib/Sema/TreeTransform.h44
-rw-r--r--clang/lib/Serialization/ASTReader.cpp11
-rw-r--r--clang/lib/Serialization/ASTReaderStmt.cpp17
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp8
-rw-r--r--clang/lib/Serialization/ASTWriterStmt.cpp12
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp4
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp1
-rw-r--r--clang/test/AST/ByteCode/cxx23.cpp61
-rw-r--r--clang/test/AST/ByteCode/invalid.cpp2
-rw-r--r--clang/test/Analysis/buffer-overlap-decls.c23
-rw-r--r--clang/test/Analysis/buffer-overlap.c7
-rw-r--r--clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif2
-rw-r--r--clang/test/Analysis/lit.local.cfg4
-rw-r--r--clang/test/CIR/CodeGen/complex.cpp37
-rw-r--r--clang/test/CIR/CodeGen/delete.cpp88
-rw-r--r--clang/test/CIR/CodeGen/lang-c-cpp.cpp4
-rw-r--r--clang/test/CIR/CodeGen/module-filename.cpp11
-rw-r--r--clang/test/CIR/CodeGen/opt-info-attr.cpp12
-rw-r--r--clang/test/CIR/CodeGen/vbase.cpp82
-rw-r--r--clang/test/CIR/CodeGen/vector-ext.cpp20
-rw-r--r--clang/test/CIR/CodeGen/vector.cpp20
-rw-r--r--clang/test/CIR/IR/global-init.cir48
-rw-r--r--clang/test/CodeGen/X86/avx-builtins.c3
-rw-r--r--clang/test/CodeGen/X86/bmi-builtins.c99
-rw-r--r--clang/test/CodeGen/X86/bmi2-builtins.c5
-rw-r--r--clang/test/CodeGen/X86/tbm-builtins.c83
-rw-r--r--clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c17
-rw-r--r--clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp7
-rw-r--r--clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp423
-rw-r--r--clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp103
-rw-r--r--clang/test/CodeGenCXX/gh56652.cpp41
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl21
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl28
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn.cl3
-rw-r--r--clang/test/Driver/modules-print-library-module-manifest-path.cpp8
-rw-r--r--clang/test/Lexer/cxx-features.cpp2
-rw-r--r--clang/test/OpenMP/for_reduction_codegen.cpp32
-rw-r--r--clang/test/OpenMP/fuse_ast_print.cpp397
-rw-r--r--clang/test/OpenMP/fuse_codegen.cpp2328
-rw-r--r--clang/test/OpenMP/fuse_messages.cpp209
-rw-r--r--clang/test/Parser/cxx2b-lambdas-ext-warns.cpp32
-rw-r--r--clang/test/SemaCUDA/vararg.cu2
-rw-r--r--clang/test/SemaCXX/amdgpu-image-rsrc.cpp17
-rw-r--r--clang/test/SemaCXX/bitfield-layout.cpp2
-rw-r--r--clang/test/SemaCXX/decltype.cpp78
-rw-r--r--clang/test/SemaCXX/invalid-requirement-requires-expr.cpp3
-rw-r--r--clang/test/SemaCXX/type-traits.cpp43
-rw-r--r--clang/test/SemaOpenCL/amdgpu-image-rsrc.cl13
-rw-r--r--clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp12
-rw-r--r--clang/test/SemaTemplate/instantiation-depth-subst-2.cpp1
-rw-r--r--clang/test/SemaTemplate/instantiation-depth-subst.cpp3
-rw-r--r--clang/tools/clang-scan-deps/ClangScanDeps.cpp2
-rw-r--r--clang/tools/libclang/CIndex.cpp19
-rw-r--r--clang/tools/libclang/CXCursor.cpp3
-rw-r--r--clang/unittests/Format/FormatTest.cpp21
-rw-r--r--clang/unittests/Format/TokenAnnotatorTest.cpp14
127 files changed, 6654 insertions, 563 deletions
diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index c44e646..80140d2 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -1446,6 +1446,9 @@ class CursorKind(BaseEnumeration):
# OpenMP stripe directive.
OMP_STRIPE_DIRECTIVE = 310
+ # OpenMP fuse directive.
+ OMP_FUSE_DIRECTIVE = 311
+
# OpenACC Compute Construct.
OPEN_ACC_COMPUTE_DIRECTIVE = 320
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 6108e54..68ca7be 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -482,6 +482,8 @@ implementation.
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| loop transformation apply clause | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop fuse transformation | :good:`done` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| workdistribute construct | | :none:`in progress` | @skc7, @mjklemm |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| task_iteration | :none:`unclaimed` | :none:`unclaimed` | |
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 98c889c..79dc0b2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -432,6 +432,9 @@ Bug Fixes to C++ Support
- Fix an assertion failure when taking the address on a non-type template parameter argument of
object type. (#GH151531)
- Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409).
+- Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610)
+- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196)
+- Fixed a crash in the pre-C++23 warning for attributes before a lambda declarator (#GH161070).
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -599,6 +602,7 @@ OpenMP Support
- Added support for ``defaultmap`` directive implicit-behavior ``storage``.
- Added support for ``defaultmap`` directive implicit-behavior ``private``.
- Added parsing and semantic analysis support for ``groupprivate`` directive.
+- Added support for 'omp fuse' directive.
Improvements
^^^^^^^^^^^^
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index be038d9..f13d9c9 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2162,6 +2162,10 @@ enum CXCursorKind {
*/
CXCursor_OMPStripeDirective = 310,
+ /** OpenMP fuse directive
+ */
+ CXCursor_OMPFuseDirective = 311,
+
/** OpenACC Compute Construct.
*/
CXCursor_OpenACCComputeConstruct = 320,
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 42b4268..68d220a 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1149,6 +1149,80 @@ public:
static OMPFullClause *CreateEmpty(const ASTContext &C);
};
+/// This class represents the 'looprange' clause in the
+/// '#pragma omp fuse' directive
+///
+/// \code {c}
+/// #pragma omp fuse looprange(1,2)
+/// {
+/// for(int i = 0; i < 64; ++i)
+/// for(int j = 0; j < 256; j+=2)
+/// for(int k = 127; k >= 0; --k)
+/// \endcode
+class OMPLoopRangeClause final : public OMPClause {
+ friend class OMPClauseReader;
+ /// Location of '('
+ SourceLocation LParenLoc;
+
+ /// Location of first and count expressions
+ SourceLocation FirstLoc, CountLoc;
+
+ /// Number of looprange arguments (always 2: first, count)
+ enum { FirstExpr, CountExpr, NumArgs };
+ Stmt *Args[NumArgs] = {nullptr, nullptr};
+
+ /// Set looprange 'first' expression
+ void setFirst(Expr *E) { Args[FirstExpr] = E; }
+
+ /// Set looprange 'count' expression
+ void setCount(Expr *E) { Args[CountExpr] = E; }
+
+ /// Build an empty clause for deserialization.
+ explicit OMPLoopRangeClause()
+ : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {}
+
+public:
+ /// Build a 'looprange' clause AST node.
+ static OMPLoopRangeClause *
+ Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc,
+ SourceLocation EndLoc, Expr *First, Expr *Count);
+
+ /// Build an empty 'looprange' clause node.
+ static OMPLoopRangeClause *CreateEmpty(const ASTContext &C);
+
+ // Location getters/setters
+ SourceLocation getLParenLoc() const { return LParenLoc; }
+ SourceLocation getFirstLoc() const { return FirstLoc; }
+ SourceLocation getCountLoc() const { return CountLoc; }
+
+ void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+ void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+ void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
+
+ /// Get looprange 'first' expression
+ Expr *getFirst() const { return cast_or_null<Expr>(Args[FirstExpr]); }
+
+ /// Get looprange 'count' expression
+ Expr *getCount() const { return cast_or_null<Expr>(Args[CountExpr]); }
+
+ child_range children() { return child_range(Args, Args + NumArgs); }
+ const_child_range children() const {
+ return const_child_range(Args, Args + NumArgs);
+ }
+
+ child_range used_children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+ const_child_range used_children() const {
+ return const_child_range(const_child_iterator(), const_child_iterator());
+ }
+
+ static bool classof(const OMPClause *T) {
+ return T->getClauseKind() == llvm::omp::OMPC_looprange;
+ }
+};
+
/// Representation of the 'partial' clause of the '#pragma omp unroll'
/// directive.
///
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index af1a073..7a2881f 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3177,6 +3177,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective,
DEF_TRAVERSE_STMT(OMPReverseDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
+DEF_TRAVERSE_STMT(OMPFuseDirective,
+ { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
DEF_TRAVERSE_STMT(OMPInterchangeDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
@@ -3495,6 +3498,14 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) {
}
template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPLoopRangeClause(
+ OMPLoopRangeClause *C) {
+ TRY_TO(TraverseStmt(C->getFirst()));
+ TRY_TO(TraverseStmt(C->getCount()));
+ return true;
+}
+
+template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) {
TRY_TO(TraverseStmt(C->getFactor()));
return true;
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index d9f87f1..bc6aeaa8 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -21,6 +21,7 @@
#include "clang/AST/StmtCXX.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
+#include "llvm/Support/Casting.h"
namespace clang {
@@ -677,6 +678,10 @@ public:
}
};
+// Forward declaration of a generic loop transformation. Used in the declaration
+// of OMPLoopBasedDirective.
+class OMPLoopTransformationDirective;
+
/// The base class for all loop-based directives, including loop transformation
/// directives.
class OMPLoopBasedDirective : public OMPExecutableDirective {
@@ -889,24 +894,23 @@ public:
/// Calls the specified callback function for all the loops in \p CurStmt,
/// from the outermost to the innermost.
- static bool doForAllLoops(
- Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops,
- llvm::function_ref<bool(unsigned, Stmt *)> Callback,
- llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)>
- OnTransformationCallback);
+ static bool
+ doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
+ unsigned NumLoops,
+ llvm::function_ref<bool(unsigned, Stmt *)> Callback,
+ llvm::function_ref<void(OMPLoopTransformationDirective *)>
+ OnTransformationCallback);
static bool
doForAllLoops(const Stmt *CurStmt, bool TryImperfectlyNestedLoops,
unsigned NumLoops,
llvm::function_ref<bool(unsigned, const Stmt *)> Callback,
- llvm::function_ref<
- void(const OMPCanonicalLoopNestTransformationDirective *)>
+ llvm::function_ref<void(const OMPLoopTransformationDirective *)>
OnTransformationCallback) {
auto &&NewCallback = [Callback](unsigned Cnt, Stmt *CurStmt) {
return Callback(Cnt, CurStmt);
};
auto &&NewTransformCb =
- [OnTransformationCallback](
- OMPCanonicalLoopNestTransformationDirective *A) {
+ [OnTransformationCallback](OMPLoopTransformationDirective *A) {
OnTransformationCallback(A);
};
return doForAllLoops(const_cast<Stmt *>(CurStmt), TryImperfectlyNestedLoops,
@@ -919,7 +923,7 @@ public:
doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
unsigned NumLoops,
llvm::function_ref<bool(unsigned, Stmt *)> Callback) {
- auto &&TransformCb = [](OMPCanonicalLoopNestTransformationDirective *) {};
+ auto &&TransformCb = [](OMPLoopTransformationDirective *) {};
return doForAllLoops(CurStmt, TryImperfectlyNestedLoops, NumLoops, Callback,
TransformCb);
}
@@ -957,9 +961,11 @@ public:
};
/// Common class of data shared between
-/// OMPCanonicalLoopNestTransformationDirective and transformations over
-/// canonical loop sequences.
+/// OMPCanonicalLoopNestTransformationDirective and
+/// OMPCanonicalLoopSequenceTransformationDirective
class OMPLoopTransformationDirective {
+ friend class ASTStmtReader;
+
/// Number of (top-level) generated loops.
/// This value is 1 for most transformations as they only map one loop nest
/// into another.
@@ -969,15 +975,39 @@ class OMPLoopTransformationDirective {
/// generate more than one loop nest, so the value would be >= 1.
unsigned NumGeneratedTopLevelLoops = 1;
+ /// We need this because we cannot easily make OMPLoopTransformationDirective
+ /// a proper Stmt.
+ Stmt *S = nullptr;
+
protected:
void setNumGeneratedTopLevelLoops(unsigned N) {
NumGeneratedTopLevelLoops = N;
}
+ explicit OMPLoopTransformationDirective(Stmt *S) : S(S) {}
+
public:
unsigned getNumGeneratedTopLevelLoops() const {
return NumGeneratedTopLevelLoops;
}
+
+ /// Returns the specific directive related to this loop transformation.
+ Stmt *getDirective() const { return S; }
+
+ /// Get the de-sugared statements after the loop transformation.
+ ///
+ /// Might be nullptr if either the directive generates no loops and is handled
+ /// directly in CodeGen, or resolving a template-dependence context is
+ /// required.
+ Stmt *getTransformedStmt() const;
+
+ /// Return preinits statement.
+ Stmt *getPreInits() const;
+
+ static bool classof(const Stmt *T) {
+ return isa<OMPCanonicalLoopNestTransformationDirective,
+ OMPCanonicalLoopSequenceTransformationDirective>(T);
+ }
};
/// The base class for all transformation directives of canonical loop nests.
@@ -990,7 +1020,8 @@ protected:
explicit OMPCanonicalLoopNestTransformationDirective(
StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned NumAssociatedLoops)
- : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops) {}
+ : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops),
+ OMPLoopTransformationDirective(this) {}
public:
/// Return the number of associated (consumed) loops.
@@ -5928,6 +5959,112 @@ public:
}
};
+/// The base class for all transformation directives of canonical loop
+/// sequences (currently only 'fuse')
+class OMPCanonicalLoopSequenceTransformationDirective
+ : public OMPExecutableDirective,
+ public OMPLoopTransformationDirective {
+ friend class ASTStmtReader;
+
+protected:
+ explicit OMPCanonicalLoopSequenceTransformationDirective(
+ StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc,
+ SourceLocation EndLoc)
+ : OMPExecutableDirective(SC, Kind, StartLoc, EndLoc),
+ OMPLoopTransformationDirective(this) {}
+
+public:
+ /// Get the de-sugared statements after the loop transformation.
+ ///
+ /// Might be nullptr if either the directive generates no loops and is handled
+ /// directly in CodeGen, or resolving a template-dependence context is
+ /// required.
+ Stmt *getTransformedStmt() const;
+
+ /// Return preinits statement.
+ Stmt *getPreInits() const;
+
+ static bool classof(const Stmt *T) {
+ Stmt::StmtClass C = T->getStmtClass();
+ return C == OMPFuseDirectiveClass;
+ }
+};
+
+/// Represents the '#pragma omp fuse' loop transformation directive
+///
+/// \code{c}
+/// #pragma omp fuse
+/// {
+/// for(int i = 0; i < m1; ++i) {...}
+/// for(int j = 0; j < m2; ++j) {...}
+/// ...
+/// }
+/// \endcode
+class OMPFuseDirective final
+ : public OMPCanonicalLoopSequenceTransformationDirective {
+ friend class ASTStmtReader;
+ friend class OMPExecutableDirective;
+
+ // Offsets of child members.
+ enum {
+ PreInitsOffset = 0,
+ TransformedStmtOffset,
+ };
+
+ explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+ : OMPCanonicalLoopSequenceTransformationDirective(
+ OMPFuseDirectiveClass, llvm::omp::OMPD_fuse, StartLoc, EndLoc) {}
+
+ void setPreInits(Stmt *PreInits) {
+ Data->getChildren()[PreInitsOffset] = PreInits;
+ }
+
+ void setTransformedStmt(Stmt *S) {
+ Data->getChildren()[TransformedStmtOffset] = S;
+ }
+
+public:
+ /// Create a new AST node representation for #pragma omp fuse'
+ ///
+ /// \param C Context of the AST
+ /// \param StartLoc Location of the introducer (e.g the 'omp' token)
+ /// \param EndLoc Location of the directive's end (e.g the tok::eod)
+ /// \param Clauses The directive's clauses
+ /// \param NumLoops Total number of loops in the canonical loop sequence.
+ /// \param NumGeneratedTopLevelLoops Number of top-level generated loops.
+ // Typically 1 but looprange clause can
+ // change this.
+ /// \param AssociatedStmt The outermost associated loop
+ /// \param TransformedStmt The loop nest after fusion, or nullptr in
+ /// dependent
+ /// \param PreInits Helper preinits statements for the loop nest
+ static OMPFuseDirective *
+ Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses, unsigned NumGeneratedTopLevelLoops,
+ Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits);
+
+ /// Build an empty '#pragma omp fuse' AST node for deserialization
+ ///
+ /// \param C Context of the AST
+ /// \param NumClauses Number of clauses to allocate
+ /// \param NumLoops Number of top level loops to allocate
+ static OMPFuseDirective *CreateEmpty(const ASTContext &C,
+ unsigned NumClauses);
+
+ /// Gets the associated loops after the transformation. This is the de-sugared
+ /// replacement or nulltpr in dependent contexts.
+ Stmt *getTransformedStmt() const {
+ return Data->getChildren()[TransformedStmtOffset];
+ }
+
+ /// Return preinits statement.
+ Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == OMPFuseDirectiveClass;
+ }
+};
+
/// This represents '#pragma omp scan' directive.
///
/// \code
@@ -6596,4 +6733,37 @@ public:
} // end namespace clang
+namespace llvm {
+// Allow a Stmt* be casted correctly to an OMPLoopTransformationDirective*.
+// The default routines would just use a C-style cast which won't work well
+// for the multiple inheritance here. We have to use a static cast from the
+// corresponding subclass.
+template <>
+struct CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>
+ : public NullableValueCastFailed<clang::OMPLoopTransformationDirective *>,
+ public DefaultDoCastIfPossible<
+ clang::OMPLoopTransformationDirective *, clang::Stmt *,
+ CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>> {
+ static bool isPossible(const clang::Stmt *T) {
+ return clang::OMPLoopTransformationDirective::classof(T);
+ }
+
+ static clang::OMPLoopTransformationDirective *doCast(clang::Stmt *T) {
+ if (auto *D =
+ dyn_cast<clang::OMPCanonicalLoopNestTransformationDirective>(T))
+ return static_cast<clang::OMPLoopTransformationDirective *>(D);
+ if (auto *D =
+ dyn_cast<clang::OMPCanonicalLoopSequenceTransformationDirective>(T))
+ return static_cast<clang::OMPLoopTransformationDirective *>(D);
+ llvm_unreachable("unexpected type");
+ }
+};
+template <>
+struct CastInfo<clang::OMPLoopTransformationDirective, const clang::Stmt *>
+ : public ConstStrippingForwardingCast<
+ clang::OMPLoopTransformationDirective, const clang::Stmt *,
+ CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>> {};
+
+} // namespace llvm
+
#endif
diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def
index d3dff446..089a72b 100644
--- a/clang/include/clang/Basic/AMDGPUTypes.def
+++ b/clang/include/clang/Basic/AMDGPUTypes.def
@@ -21,6 +21,7 @@
#endif
AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8)
+AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_texture_t", AMDGPUTexture, AMDGPUTextureTy, 256, 256, 0)
AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0)
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 9aad00b..b856ad1 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -34,6 +34,7 @@
// Q -> target builtin type, followed by a character to distinguish the builtin type
// Qa -> AArch64 svcount_t builtin type.
// Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type.
+// Qt -> AMDGPU __amdgpu_texture_t builtin type.
// E -> ext_vector, followed by the number of elements and the base type.
// X -> _Complex, followed by the base type.
// Y -> ptrdiff_t
diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h
index af26a04..e540040 100644
--- a/clang/include/clang/Basic/Diagnostic.h
+++ b/clang/include/clang/Basic/Diagnostic.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
@@ -1367,6 +1368,22 @@ inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
}
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
+ const llvm::APSInt &Int) {
+ DB.AddString(toString(Int, /*Radix=*/10, Int.isSigned(),
+ /*formatAsCLiteral=*/false,
+ /*UpperCase=*/true, /*InsertSeparators=*/true));
+ return DB;
+}
+
+inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
+ const llvm::APInt &Int) {
+ DB.AddString(toString(Int, /*Radix=*/10, /*Signed=*/false,
+ /*formatAsCLiteral=*/false,
+ /*UpperCase=*/true, /*InsertSeparators=*/true));
+ return DB;
+}
+
+inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
int I) {
DB.AddTaggedVal(I, DiagnosticsEngine::ak_sint);
return DB;
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 4d9e123..c724136 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1141,7 +1141,7 @@ def warn_cxx23_compat_binding_pack : Warning<
def err_capture_default_first : Error<
"capture default must be first">;
def ext_decl_attrs_on_lambda : ExtWarn<
- "%select{an attribute specifier sequence|%0}1 in this position "
+ "%select{an attribute specifier sequence|%1}0 in this position "
"is a C++23 extension">, InGroup<CXX23AttrsOnLambda>;
def ext_lambda_missing_parens : ExtWarn<
"lambda without a parameter clause is a C++23 extension">,
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index dc4c6d3..b157cbb 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5770,8 +5770,10 @@ def err_template_recursion_depth_exceeded : Error<
def err_constraint_depends_on_self
: Error<"satisfaction of constraint %0 depends on itself">,
NoSFINAE;
-def note_template_recursion_depth : Note<
- "use -ftemplate-depth=N to increase recursive template instantiation depth">;
+def note_template_recursion_depth
+ : Note<"use -ftemplate-depth=N to increase recursive template "
+ "instantiation depth">,
+ NoSFINAE;
def err_template_instantiate_within_definition : Error<
"%select{implicit|explicit}0 instantiation of template %1 within its"
@@ -11761,6 +11763,18 @@ def note_omp_implicit_dsa : Note<
"implicitly determined as %0">;
def err_omp_loop_var_dsa : Error<
"loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
+def err_omp_not_a_loop_sequence
+ : Error<"statement after '#pragma omp %0' must be a loop sequence "
+ "containing canonical loops or loop-generating constructs">;
+def err_omp_empty_loop_sequence
+ : Error<"loop sequence after '#pragma omp %0' must contain at least 1 "
+ "canonical loop or loop-generating construct">;
+def err_omp_invalid_looprange
+ : Error<"looprange clause selects loops from %1 to %2 but this exceeds the "
+ "number of loops (%3) in the loop sequence">;
+def warn_omp_redundant_fusion : Warning<"looprange clause selects a single "
+ "loop, resulting in redundant fusion">,
+ InGroup<OpenMPClauses>;
def err_omp_not_for : Error<
"%select{statement after '#pragma omp %1' must be a for loop|"
"expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h
index 4c988e0..ed89a31 100644
--- a/clang/include/clang/Basic/OpenMPKinds.h
+++ b/clang/include/clang/Basic/OpenMPKinds.h
@@ -391,6 +391,13 @@ bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind);
bool isOpenMPCanonicalLoopNestTransformationDirective(
OpenMPDirectiveKind DKind);
+/// Checks if the specified directive is a loop transformation directive that
+/// applies to a canonical loop sequence.
+/// \param DKind Specified directive.
+/// \return True iff the directive is a loop transformation.
+bool isOpenMPCanonicalLoopSequenceTransformationDirective(
+ OpenMPDirectiveKind DKind);
+
/// Checks if the specified directive is a loop transformation directive.
/// \param DKind Specified directive.
/// \return True iff the directive is a loop transformation.
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index dd1a244..bf3686b 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -238,6 +238,10 @@ def OMPUnrollDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
def OMPReverseDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
def OMPInterchangeDirective
: StmtNode<OMPCanonicalLoopNestTransformationDirective>;
+def OMPCanonicalLoopSequenceTransformationDirective
+ : StmtNode<OMPExecutableDirective, 1>;
+def OMPFuseDirective
+ : StmtNode<OMPCanonicalLoopSequenceTransformationDirective>;
def OMPForDirective : StmtNode<OMPLoopDirective>;
def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index e5c5ada..ceb16174 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1259,6 +1259,10 @@ public:
ArrayRef<ConstraintInfo> OutputConstraints,
unsigned &Index) const;
+ std::string
+ simplifyConstraint(StringRef Constraint,
+ SmallVectorImpl<ConstraintInfo> *OutCons = nullptr) const;
+
// Constraint parm will be left pointing at the last character of
// the constraint. In practice, it won't be changed unless the
// constraint is longer than one character.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index bb39444..e1be08c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -683,8 +683,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
//===----------------------------------------------------------------------===//
defvar CIR_YieldableScopes = [
- "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp",
- "SwitchOp", "TernaryOp", "WhileOp"
+ "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
+ "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp"
];
def CIR_YieldOp : CIR_Op<"yield", [
@@ -1776,7 +1776,9 @@ def CIR_GlobalLinkageKind : CIR_I32EnumAttr<
// is upstreamed.
def CIR_GlobalOp : CIR_Op<"global", [
- DeclareOpInterfaceMethods<CIRGlobalValueInterface>
+ DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+ DeclareOpInterfaceMethods<CIRGlobalValueInterface>,
+ NoRegionArguments
]> {
let summary = "Declare or define a global variable";
let description = [{
@@ -1807,6 +1809,9 @@ def CIR_GlobalOp : CIR_Op<"global", [
UnitAttr:$dso_local,
OptionalAttr<I64Attr>:$alignment);
+ let regions = (region MaxSizedRegion<1>:$ctorRegion,
+ MaxSizedRegion<1>:$dtorRegion);
+
let assemblyFormat = [{
($sym_visibility^)?
(`` $global_visibility^)?
@@ -1815,24 +1820,34 @@ def CIR_GlobalOp : CIR_Op<"global", [
(`comdat` $comdat^)?
(`dso_local` $dso_local^)?
$sym_name
- custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value)
+ custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value,
+ $ctorRegion, $dtorRegion)
attr-dict
}];
let extraClassDeclaration = [{
- bool isDeclaration() { return !getInitialValue(); }
+ bool isDeclaration() {
+ return !getInitialValue() && getCtorRegion().empty() && getDtorRegion().empty();
+ }
bool hasInitializer() { return !isDeclaration(); }
}];
let skipDefaultBuilders = 1;
- let builders = [OpBuilder<(ins
- "llvm::StringRef":$sym_name,
- "mlir::Type":$sym_type,
- CArg<"bool", "false">:$isConstant,
- // CIR defaults to external linkage.
- CArg<"cir::GlobalLinkageKind",
- "cir::GlobalLinkageKind::ExternalLinkage">:$linkage)>];
+ let builders = [
+ OpBuilder<(ins
+ "llvm::StringRef":$sym_name,
+ "mlir::Type":$sym_type,
+ CArg<"bool", "false">:$isConstant,
+ // CIR defaults to external linkage.
+ CArg<"cir::GlobalLinkageKind",
+ "cir::GlobalLinkageKind::ExternalLinkage">:$linkage,
+ CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>",
+ "nullptr">:$ctorBuilder,
+ CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>",
+ "nullptr">:$dtorBuilder)
+ >
+ ];
let hasVerifier = 1;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 0fac1b2..7e59989 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -208,6 +208,7 @@ struct MissingFeatures {
static bool dataLayoutTypeAllocSize() { return false; }
static bool dataLayoutTypeStoreSize() { return false; }
static bool deferredCXXGlobalInit() { return false; }
+ static bool deleteArray() { return false; }
static bool devirtualizeMemberFunction() { return false; }
static bool ehCleanupFlags() { return false; }
static bool ehCleanupScope() { return false; }
@@ -219,6 +220,7 @@ struct MissingFeatures {
static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
static bool emitLifetimeMarkers() { return false; }
static bool emitLValueAlignmentAssumption() { return false; }
+ static bool emitNullCheckForDeleteCalls() { return false; }
static bool emitNullabilityCheck() { return false; }
static bool emitTypeCheck() { return false; }
static bool emitTypeMetadataCodeForVCall() { return false; }
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 30edd30..e301cf1 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -6767,6 +6767,9 @@ private:
OpenMPClauseKind Kind,
bool ParseOnly);
+ /// Parses the 'looprange' clause of a '#pragma omp fuse' directive.
+ OMPClause *ParseOpenMPLoopRangeClause();
+
/// Parses the 'sizes' clause of a '#pragma omp tile' directive.
OMPClause *ParseOpenMPSizesClause();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 2bd6be2..f53aafd 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13335,8 +13335,6 @@ public:
Sema &SemaRef;
bool Invalid;
bool AlreadyInstantiating;
- bool CheckInstantiationDepth(SourceLocation PointOfInstantiation,
- SourceRange InstantiationRange);
InstantiatingTemplate(Sema &SemaRef,
CodeSynthesisContext::SynthesisKind Kind,
@@ -13529,7 +13527,7 @@ public:
~ArgPackSubstIndexRAII() { Self.ArgPackSubstIndex = OldSubstIndex; }
};
- void pushCodeSynthesisContext(CodeSynthesisContext Ctx);
+ bool pushCodeSynthesisContext(CodeSynthesisContext Ctx);
void popCodeSynthesisContext();
void PrintContextStack(InstantiationContextDiagFuncRef DiagFunc) {
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index c0fd7a6..daf58b1 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -463,6 +463,13 @@ public:
Stmt *AStmt,
SourceLocation StartLoc,
SourceLocation EndLoc);
+
+ /// Called on well-formed '#pragma omp fuse' after parsing of its
+ /// clauses and the associated statement.
+ StmtResult ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt, SourceLocation StartLoc,
+ SourceLocation EndLoc);
+
/// Called on well-formed '\#pragma omp for' after parsing
/// of the associated statement.
StmtResult
@@ -921,6 +928,12 @@ public:
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc);
+
+ /// Called on well-form 'looprange' clause after parsing its arguments.
+ OMPClause *
+ ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc);
/// Called on well-formed 'ordered' clause.
OMPClause *
ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc,
@@ -1485,7 +1498,81 @@ private:
bool checkTransformableLoopNest(
OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
- Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
+ Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits);
+
+ /// Holds the result of the analysis of a (possibly canonical) loop.
+ struct LoopAnalysis {
+ /// The analyzed loop or loop transformation.
+ Stmt *AStmt = nullptr;
+ /// Loop analyses results.
+ OMPLoopBasedDirective::HelperExprs HelperExprs;
+ /// The for-statement of the loop. TheForStmt equals AStmt only when the
+ /// latter is a canonical loop (i.e. not a loop transformation).
+ Stmt *TheForStmt = nullptr;
+ /// Initialization statements before transformations.
+ SmallVector<Stmt *> OriginalInits;
+ /// Initialization statements required after transformation of this loop.
+ SmallVector<Stmt *> TransformsPreInits;
+
+ explicit LoopAnalysis(Stmt *S) : AStmt(S) {}
+
+ bool isRegularLoop() const { return isRegularLoop(AStmt); }
+ bool isLoopTransformation() const { return isLoopTransformation(AStmt); }
+
+ // Convenience functions used when building LoopSequenceAnalysis.
+ static bool isRegularLoop(Stmt *S) {
+ return isa<ForStmt, CXXForRangeStmt>(S);
+ }
+ static bool isLoopTransformation(Stmt *S) {
+ return isa<OMPLoopTransformationDirective>(S);
+ }
+ };
+
+ /// Holds the result of the analysis of a (possibly canonical) loop sequence.
+ struct LoopSequenceAnalysis {
+ /// Number of top level canonical loops.
+ unsigned LoopSeqSize = 0;
+ /// For each loop results of the analysis.
+ SmallVector<LoopAnalysis, 2> Loops;
+ /// Additional code required before entering the transformed loop sequence.
+ SmallVector<Stmt *> LoopSequencePreInits;
+
+ // Convenience function used when building the LoopSequenceAnalysis.
+ static bool isLoopSequenceDerivation(Stmt *S) {
+ return LoopAnalysis::isRegularLoop(S) ||
+ LoopAnalysis::isLoopTransformation(S);
+ }
+ };
+
+ /// The main recursive process of `checkTransformableLoopSequence` that
+ /// performs grammatical parsing of a canonical loop sequence. It extracts
+ /// key information, such as the number of top-level loops, loop statements,
+ /// helper expressions, and other relevant loop-related data, all in a single
+ /// execution to avoid redundant traversals. This analysis flattens inner
+ /// Loop Sequences
+ ///
+ /// \param LoopSeqStmt The AST of the original statement.
+ /// \param SeqAnalysis [out] Result of the analysis of \p LoopSeqStmt
+ /// \param Context
+ /// \param Kind The loop transformation directive kind.
+ /// \return Whether the original statement is both syntactically and
+ /// semantically correct according to OpenMP 6.0 canonical loop
+ /// sequence definition.
+ bool analyzeLoopSequence(Stmt *LoopSeqStmt, LoopSequenceAnalysis &SeqAnalysis,
+ ASTContext &Context, OpenMPDirectiveKind Kind);
+
+ /// Validates and checks whether a loop sequence can be transformed according
+ /// to the given directive, providing necessary setup and initialization
+ /// (Driver function) before recursion using `analyzeLoopSequence`.
+ ///
+ /// \param Kind The loop transformation directive kind.
+ /// \param AStmt The AST of the original statement
+ /// \param SeqAnalysis [out] Result of the analysis of \p LoopSeqStmt
+ /// \param Context
+ /// \return Whether there was an absence of errors or not
+ bool checkTransformableLoopSequence(OpenMPDirectiveKind Kind, Stmt *AStmt,
+ LoopSequenceAnalysis &SeqAnalysis,
+ ASTContext &Context);
/// Helper to keep information about the current `omp begin/end declare
/// variant` nesting.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 441047d..99864c7 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1951,6 +1951,7 @@ enum StmtCode {
STMT_OMP_UNROLL_DIRECTIVE,
STMT_OMP_REVERSE_DIRECTIVE,
STMT_OMP_INTERCHANGE_DIRECTIVE,
+ STMT_OMP_FUSE_DIRECTIVE,
STMT_OMP_FOR_DIRECTIVE,
STMT_OMP_FOR_SIMD_DIRECTIVE,
STMT_OMP_SECTIONS_DIRECTIVE,
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 61dd330..0fd0e7e 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -12590,6 +12590,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
Type = Context.AMDGPUBufferRsrcTy;
break;
}
+ case 't': {
+ Type = Context.AMDGPUTextureTy;
+ break;
+ }
default:
llvm_unreachable("Unexpected target builtin type");
}
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index b4da999..0b7b6cd 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -2934,8 +2934,9 @@ bool Compiler<Emitter>::VisitMaterializeTemporaryExpr(
// For everyhing else, use local variables.
if (SubExprT) {
bool IsConst = SubExpr->getType().isConstQualified();
- unsigned LocalIndex =
- allocateLocalPrimitive(E, *SubExprT, IsConst, E->getExtendingDecl());
+ bool IsVolatile = SubExpr->getType().isVolatileQualified();
+ unsigned LocalIndex = allocateLocalPrimitive(
+ E, *SubExprT, IsConst, IsVolatile, E->getExtendingDecl());
if (!this->visit(SubExpr))
return false;
if (!this->emitSetLocal(*SubExprT, LocalIndex, E))
@@ -4452,6 +4453,9 @@ bool Compiler<Emitter>::visitAssignment(const Expr *LHS, const Expr *RHS,
if (!this->visit(LHS))
return false;
+ if (LHS->getType().isVolatileQualified())
+ return this->emitInvalidStore(LHS->getType().getTypePtr(), E);
+
// We don't support assignments in C.
if (!Ctx.getLangOpts().CPlusPlus && !this->emitInvalid(E))
return false;
@@ -4560,13 +4564,14 @@ bool Compiler<Emitter>::emitConst(const APSInt &Value, const Expr *E) {
template <class Emitter>
unsigned Compiler<Emitter>::allocateLocalPrimitive(
- DeclTy &&Src, PrimType Ty, bool IsConst, const ValueDecl *ExtendingDecl,
- ScopeKind SC, bool IsConstexprUnknown) {
+ DeclTy &&Src, PrimType Ty, bool IsConst, bool IsVolatile,
+ const ValueDecl *ExtendingDecl, ScopeKind SC, bool IsConstexprUnknown) {
// FIXME: There are cases where Src.is<Expr*>() is wrong, e.g.
// (int){12} in C. Consider using Expr::isTemporaryObject() instead
// or isa<MaterializeTemporaryExpr>().
Descriptor *D = P.createDescriptor(Src, Ty, nullptr, Descriptor::InlineDescMD,
- IsConst, isa<const Expr *>(Src));
+ IsConst, isa<const Expr *>(Src),
+ /*IsMutable=*/false, IsVolatile);
D->IsConstexprUnknown = IsConstexprUnknown;
Scope::Local Local = this->createLocal(D);
if (auto *VD = dyn_cast_if_present<ValueDecl>(Src.dyn_cast<const Decl *>()))
@@ -4874,7 +4879,8 @@ Compiler<Emitter>::visitVarDecl(const VarDecl *VD, const Expr *Init,
if (VarT) {
unsigned Offset = this->allocateLocalPrimitive(
- VD, *VarT, VD->getType().isConstQualified(), nullptr, ScopeKind::Block,
+ VD, *VarT, VD->getType().isConstQualified(),
+ VD->getType().isVolatileQualified(), nullptr, ScopeKind::Block,
IsConstexprUnknown);
if (Init) {
// If this is a toplevel declaration, create a scope for the
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 09599b3..5c46f75 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -327,6 +327,7 @@ protected:
/// Creates a local primitive value.
unsigned allocateLocalPrimitive(DeclTy &&Decl, PrimType Ty, bool IsConst,
+ bool IsVolatile = false,
const ValueDecl *ExtendingDecl = nullptr,
ScopeKind SC = ScopeKind::Block,
bool IsConstexprUnknown = false);
diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp
index 306f95c..683e916 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -567,9 +567,15 @@ const Function *Context::getOrCreateFunction(const FunctionDecl *FuncDecl) {
// Assign descriptors to all parameters.
// Composite objects are lowered to pointers.
for (const ParmVarDecl *PD : FuncDecl->parameters()) {
+ bool IsConst = PD->getType().isConstQualified();
+ bool IsVolatile = PD->getType().isVolatileQualified();
+
OptPrimType T = classify(PD->getType());
PrimType PT = T.value_or(PT_Ptr);
- Descriptor *Desc = P->createDescriptor(PD, PT);
+ Descriptor *Desc = P->createDescriptor(PD, PT, nullptr, std::nullopt,
+ IsConst, /*IsTemporary=*/false,
+ /*IsMutable=*/false, IsVolatile);
+
ParamDescriptors.insert({ParamOffset, {PT, Desc}});
ParamOffsets.push_back(ParamOffset);
ParamOffset += align(primSize(PT));
@@ -595,9 +601,14 @@ const Function *Context::getOrCreateObjCBlock(const BlockExpr *E) {
// Assign descriptors to all parameters.
// Composite objects are lowered to pointers.
for (const ParmVarDecl *PD : BD->parameters()) {
+ bool IsConst = PD->getType().isConstQualified();
+ bool IsVolatile = PD->getType().isVolatileQualified();
+
OptPrimType T = classify(PD->getType());
PrimType PT = T.value_or(PT_Ptr);
- Descriptor *Desc = P->createDescriptor(PD, PT);
+ Descriptor *Desc = P->createDescriptor(PD, PT, nullptr, std::nullopt,
+ IsConst, /*IsTemporary=*/false,
+ /*IsMutable=*/false, IsVolatile);
ParamDescriptors.insert({ParamOffset, {PT, Desc}});
ParamOffsets.push_back(ParamOffset);
ParamOffset += align(primSize(PT));
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index 8aaefc7..21af3d6 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -889,6 +889,8 @@ bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
return false;
if (!CheckConst(S, OpPC, Ptr))
return false;
+ if (!CheckVolatile(S, OpPC, Ptr, AK_Assign))
+ return false;
if (!S.inConstantContext() && isConstexprUnknown(Ptr))
return false;
return true;
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 7867a06..bb0c458 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -1730,9 +1730,8 @@ inline bool GetPtrLocal(InterpState &S, CodePtr OpPC, uint32_t I) {
}
inline bool GetPtrParam(InterpState &S, CodePtr OpPC, uint32_t I) {
- if (S.checkingPotentialConstantExpression()) {
+ if (S.Current->isBottomFrame())
return false;
- }
S.Stk.push<Pointer>(S.Current->getParamPointer(I));
return true;
}
@@ -3344,6 +3343,18 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
return false;
}
+inline bool InvalidStore(InterpState &S, CodePtr OpPC, const Type *T) {
+ if (S.getLangOpts().CPlusPlus) {
+ QualType VolatileType = QualType(T, 0).withVolatile();
+ S.FFDiag(S.Current->getSource(OpPC),
+ diag::note_constexpr_access_volatile_type)
+ << AK_Assign << VolatileType;
+ } else {
+ S.FFDiag(S.Current->getSource(OpPC));
+ }
+ return false;
+}
+
inline bool InvalidDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR,
bool InitializerFailed) {
assert(DR);
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d..a2e97fc 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1294,95 +1294,6 @@ static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_ia32_bextr(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType())
- return false;
-
- APSInt Index = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- uint64_t Shift = Index.extractBitsAsZExtValue(8, 0);
- uint64_t Length = Index.extractBitsAsZExtValue(8, 8);
- Length = Length > BitWidth ? BitWidth : Length;
-
- // Handle out of bounds cases.
- if (Length == 0 || Shift >= BitWidth) {
- pushInteger(S, 0, Call->getType());
- return true;
- }
-
- uint64_t Result = Val.getZExtValue() >> Shift;
- Result &= llvm::maskTrailingOnes<uint64_t>(Length);
- pushInteger(S, Result, Call->getType());
- return true;
-}
-
-static bool interp__builtin_ia32_bzhi(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- QualType CallType = Call->getType();
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType() ||
- !CallType->isIntegerType())
- return false;
-
- APSInt Idx = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- uint64_t Index = Idx.extractBitsAsZExtValue(8, 0);
-
- if (Index < BitWidth)
- Val.clearHighBits(BitWidth - Index);
-
- pushInteger(S, Val, CallType);
- return true;
-}
-
-static bool interp__builtin_ia32_pdep(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType())
- return false;
-
- APSInt Mask = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- APInt Result = APInt::getZero(BitWidth);
- for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
- if (Mask[I])
- Result.setBitVal(I, Val[P++]);
- }
- pushInteger(S, std::move(Result), Call->getType());
- return true;
-}
-
-static bool interp__builtin_ia32_pext(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType())
- return false;
-
- APSInt Mask = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- APInt Result = APInt::getZero(BitWidth);
- for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
- if (Mask[I])
- Result.setBitVal(P++, Val[I]);
- }
- pushInteger(S, std::move(Result), Call->getType());
- return true;
-}
-
/// (CarryIn, LHS, RHS, Result)
static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S,
CodePtr OpPC,
@@ -3275,11 +3186,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_bextr_u64:
case clang::X86::BI__builtin_ia32_bextri_u32:
case clang::X86::BI__builtin_ia32_bextri_u64:
- return interp__builtin_ia32_bextr(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) {
+ unsigned BitWidth = Val.getBitWidth();
+ uint64_t Shift = Idx.extractBitsAsZExtValue(8, 0);
+ uint64_t Length = Idx.extractBitsAsZExtValue(8, 8);
+ if (Length > BitWidth) {
+ Length = BitWidth;
+ }
+
+ // Handle out of bounds cases.
+ if (Length == 0 || Shift >= BitWidth)
+ return APInt(BitWidth, 0);
+
+ uint64_t Result = Val.getZExtValue() >> Shift;
+ Result &= llvm::maskTrailingOnes<uint64_t>(Length);
+ return APInt(BitWidth, Result);
+ });
case clang::X86::BI__builtin_ia32_bzhi_si:
case clang::X86::BI__builtin_ia32_bzhi_di:
- return interp__builtin_ia32_bzhi(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) {
+ unsigned BitWidth = Val.getBitWidth();
+ uint64_t Index = Idx.extractBitsAsZExtValue(8, 0);
+ APSInt Result = Val;
+
+ if (Index < BitWidth)
+ Result.clearHighBits(BitWidth - Index);
+
+ return Result;
+ });
case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
@@ -3299,11 +3236,33 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_pdep_si:
case clang::X86::BI__builtin_ia32_pdep_di:
- return interp__builtin_ia32_pdep(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) {
+ unsigned BitWidth = Val.getBitWidth();
+ APInt Result = APInt::getZero(BitWidth);
+
+ for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
+ if (Mask[I])
+ Result.setBitVal(I, Val[P++]);
+ }
+
+ return Result;
+ });
case clang::X86::BI__builtin_ia32_pext_si:
case clang::X86::BI__builtin_ia32_pext_di:
- return interp__builtin_ia32_pext(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) {
+ unsigned BitWidth = Val.getBitWidth();
+ APInt Result = APInt::getZero(BitWidth);
+
+ for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
+ if (Mask[I])
+ Result.setBitVal(P++, Val[I]);
+ }
+
+ return Result;
+ });
case clang::X86::BI__builtin_ia32_addcarryx_u32:
case clang::X86::BI__builtin_ia32_addcarryx_u64:
diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp
index a3db0d7..039acb5 100644
--- a/clang/lib/AST/ByteCode/InterpFrame.cpp
+++ b/clang/lib/AST/ByteCode/InterpFrame.cpp
@@ -231,6 +231,8 @@ Pointer InterpFrame::getParamPointer(unsigned Off) {
if (auto Pt = Params.find(Off); Pt != Params.end())
return Pointer(reinterpret_cast<Block *>(Pt->second.get()));
+ assert(!isBottomFrame());
+
// Allocate memory to store the parameter and the block metadata.
const auto &Desc = Func->getParamDescriptor(Off);
size_t BlockSize = sizeof(Block) + Desc.second->getAllocSize();
diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td
index 7af2df5..532c444 100644
--- a/clang/lib/AST/ByteCode/Opcodes.td
+++ b/clang/lib/AST/ByteCode/Opcodes.td
@@ -797,6 +797,7 @@ def SideEffect : Opcode {}
def InvalidCast : Opcode {
let Args = [ArgCastKind, ArgBool];
}
+def InvalidStore : Opcode { let Args = [ArgTypePtr]; }
def CheckPseudoDtor : Opcode {}
def InvalidDeclRef : Opcode {
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index af89b66..cd738ce 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -262,6 +262,7 @@ public:
case Storage::Typeid:
return false;
}
+ llvm_unreachable("Unknown clang::interp::Storage enum");
}
/// Checks if the pointer is live.
bool isLive() const {
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 55b93e1..2ce4419 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
return new (C) OMPPartialClause();
}
+OMPLoopRangeClause *
+OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc,
+ Expr *First, Expr *Count) {
+ OMPLoopRangeClause *Clause = CreateEmpty(C);
+ Clause->setLocStart(StartLoc);
+ Clause->setLParenLoc(LParenLoc);
+ Clause->setFirstLoc(FirstLoc);
+ Clause->setCountLoc(CountLoc);
+ Clause->setLocEnd(EndLoc);
+ Clause->setFirst(First);
+ Clause->setCount(Count);
+ return Clause;
+}
+
+OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) {
+ return new (C) OMPLoopRangeClause();
+}
+
OMPAllocateClause *OMPAllocateClause::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc,
@@ -1964,6 +1984,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) {
}
}
+void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) {
+ OS << "looprange";
+
+ Expr *First = Node->getFirst();
+ Expr *Count = Node->getCount();
+
+ if (First && Count) {
+ OS << "(";
+ First->printPretty(OS, nullptr, Policy, 0);
+ OS << ",";
+ Count->printPretty(OS, nullptr, Policy, 0);
+ OS << ")";
+ }
+}
+
void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) {
OS << "allocator(";
Node->getAllocator()->printPretty(OS, nullptr, Policy, 0);
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 1f6586f..a5b0cd3 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -125,13 +125,12 @@ OMPLoopBasedDirective::tryToFindNextInnerLoop(Stmt *CurStmt,
bool OMPLoopBasedDirective::doForAllLoops(
Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops,
llvm::function_ref<bool(unsigned, Stmt *)> Callback,
- llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)>
+ llvm::function_ref<void(OMPLoopTransformationDirective *)>
OnTransformationCallback) {
CurStmt = CurStmt->IgnoreContainers();
for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) {
while (true) {
- auto *Dir =
- dyn_cast<OMPCanonicalLoopNestTransformationDirective>(CurStmt);
+ auto *Dir = dyn_cast<OMPLoopTransformationDirective>(CurStmt);
if (!Dir)
break;
@@ -371,6 +370,22 @@ OMPForDirective *OMPForDirective::Create(
return Dir;
}
+Stmt *OMPLoopTransformationDirective::getTransformedStmt() const {
+ if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S))
+ return D->getTransformedStmt();
+ if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S))
+ return D->getTransformedStmt();
+ llvm_unreachable("unexpected object type");
+}
+
+Stmt *OMPLoopTransformationDirective::getPreInits() const {
+ if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S))
+ return D->getPreInits();
+ if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S))
+ return D->getPreInits();
+ llvm_unreachable("unexpected object type");
+}
+
Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const {
switch (getStmtClass()) {
#define STMT(CLASS, PARENT)
@@ -380,7 +395,7 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const {
return static_cast<const CLASS *>(this)->getTransformedStmt();
#include "clang/AST/StmtNodes.inc"
default:
- llvm_unreachable("Not a loop transformation");
+ llvm_unreachable("Not a loop transformation for canonical loop nests");
}
}
@@ -393,7 +408,34 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getPreInits() const {
return static_cast<const CLASS *>(this)->getPreInits();
#include "clang/AST/StmtNodes.inc"
default:
- llvm_unreachable("Not a loop transformation");
+ llvm_unreachable("Not a loop transformation for canonical loop nests");
+ }
+}
+
+Stmt *
+OMPCanonicalLoopSequenceTransformationDirective::getTransformedStmt() const {
+ switch (getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \
+ case Stmt::CLASS##Class: \
+ return static_cast<const CLASS *>(this)->getTransformedStmt();
+#include "clang/AST/StmtNodes.inc"
+ default:
+ llvm_unreachable("Not a loop transformation for canonical loop sequences");
+ }
+}
+
+Stmt *OMPCanonicalLoopSequenceTransformationDirective::getPreInits() const {
+ switch (getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \
+ case Stmt::CLASS##Class: \
+ return static_cast<const CLASS *>(this)->getPreInits();
+#include "clang/AST/StmtNodes.inc"
+ default:
+ llvm_unreachable("Not a loop transformation for canonical loop sequences");
}
}
@@ -510,6 +552,27 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
SourceLocation(), SourceLocation(), NumLoops);
}
+OMPFuseDirective *OMPFuseDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses, unsigned NumGeneratedTopLevelLoops,
+ Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) {
+
+ OMPFuseDirective *Dir = createDirective<OMPFuseDirective>(
+ C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
+ Dir->setTransformedStmt(TransformedStmt);
+ Dir->setPreInits(PreInits);
+ Dir->setNumGeneratedTopLevelLoops(NumGeneratedTopLevelLoops);
+ return Dir;
+}
+
+OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
+ unsigned NumClauses) {
+ OMPFuseDirective *Dir = createEmptyDirective<OMPFuseDirective>(
+ C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+ SourceLocation(), SourceLocation());
+ return Dir;
+}
+
OMPForSimdDirective *
OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned CollapsedNum,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 2c9c358..586c300 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -795,6 +795,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) {
PrintOMPExecutableDirective(Node);
}
+void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) {
+ Indent() << "#pragma omp fuse";
+ PrintOMPExecutableDirective(Node);
+}
+
void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
Indent() << "#pragma omp for";
PrintOMPExecutableDirective(Node);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 8b3af94..589a156 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -510,6 +510,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) {
Profiler->VisitExpr(Factor);
}
+void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+ if (const Expr *First = C->getFirst())
+ Profiler->VisitExpr(First);
+ if (const Expr *Count = C->getCount())
+ Profiler->VisitExpr(Count);
+}
+
void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
if (C->getAllocator())
Profiler->VisitStmt(C->getAllocator());
@@ -1025,6 +1032,15 @@ void StmtProfiler::VisitOMPInterchangeDirective(
VisitOMPCanonicalLoopNestTransformationDirective(S);
}
+void StmtProfiler::VisitOMPCanonicalLoopSequenceTransformationDirective(
+ const OMPCanonicalLoopSequenceTransformationDirective *S) {
+ VisitOMPExecutableDirective(S);
+}
+
+void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) {
+ VisitOMPCanonicalLoopSequenceTransformationDirective(S);
+}
+
void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
VisitOMPLoopDirective(S);
}
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 387026e..64b2bff 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -282,6 +282,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
case OMPC_affinity:
case OMPC_when:
case OMPC_append_args:
+ case OMPC_looprange:
break;
default:
break;
@@ -627,6 +628,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
case OMPC_affinity:
case OMPC_when:
case OMPC_append_args:
+ case OMPC_looprange:
break;
default:
break;
@@ -755,9 +757,14 @@ bool clang::isOpenMPCanonicalLoopNestTransformationDirective(
DKind == OMPD_interchange || DKind == OMPD_stripe;
}
+bool clang::isOpenMPCanonicalLoopSequenceTransformationDirective(
+ OpenMPDirectiveKind DKind) {
+ return DKind == OMPD_fuse;
+}
+
bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
- // FIXME: There will be more cases when we implement 'fuse'.
- return isOpenMPCanonicalLoopNestTransformationDirective(DKind);
+ return isOpenMPCanonicalLoopNestTransformationDirective(DKind) ||
+ isOpenMPCanonicalLoopSequenceTransformationDirective(DKind);
}
bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 72ee09d..f4d7c12 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -18,6 +18,7 @@
#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/TargetParser.h"
#include <cstdlib>
@@ -1042,3 +1043,51 @@ void TargetInfo::copyAuxTarget(const TargetInfo *Aux) {
auto *Src = static_cast<const TransferrableTargetInfo*>(Aux);
*Target = *Src;
}
+
+std::string
+TargetInfo::simplifyConstraint(StringRef Constraint,
+ SmallVectorImpl<ConstraintInfo> *OutCons) const {
+ std::string Result;
+
+ for (const char *I = Constraint.begin(), *E = Constraint.end(); I < E; I++) {
+ switch (*I) {
+ default:
+ Result += convertConstraint(I);
+ break;
+ // Ignore these
+ case '*':
+ case '?':
+ case '!':
+ case '=': // Will see this and the following in mult-alt constraints.
+ case '+':
+ break;
+ case '#': // Ignore the rest of the constraint alternative.
+ while (I + 1 != E && I[1] != ',')
+ I++;
+ break;
+ case '&':
+ case '%':
+ Result += *I;
+ while (I + 1 != E && I[1] == *I)
+ I++;
+ break;
+ case ',':
+ Result += "|";
+ break;
+ case 'g':
+ Result += "imr";
+ break;
+ case '[': {
+ assert(OutCons &&
+ "Must pass output names to constraints with a symbolic name");
+ unsigned Index;
+ bool ResolveResult = resolveSymbolicName(I, *OutCons, Index);
+ assert(ResolveResult && "Could not resolve symbolic name");
+ (void)ResolveResult;
+ Result += llvm::utostr(Index);
+ break;
+ }
+ }
+ }
+ return Result;
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index cb8fe6c..9d12a13 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -951,28 +951,37 @@ Address CIRGenFunction::getAddressOfBaseClass(
bool nullCheckValue, SourceLocation loc) {
assert(!path.empty() && "Base path should not be empty!");
+ CastExpr::path_const_iterator start = path.begin();
+ const CXXRecordDecl *vBase = nullptr;
+
if ((*path.begin())->isVirtual()) {
- // The implementation here is actually complete, but let's flag this
- // as an error until the rest of the virtual base class support is in place.
- cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base");
- return Address::invalid();
+ vBase = (*start)->getType()->castAsCXXRecordDecl();
+ ++start;
}
// Compute the static offset of the ultimate destination within its
// allocating subobject (the virtual base, if there is one, or else
// the "complete" object that we see).
- CharUnits nonVirtualOffset =
- cgm.computeNonVirtualBaseClassOffset(derived, path);
+ CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset(
+ vBase ? vBase : derived, {start, path.end()});
+
+ // If there's a virtual step, we can sometimes "devirtualize" it.
+ // For now, that's limited to when the derived type is final.
+ // TODO: "devirtualize" this for accesses to known-complete objects.
+ if (vBase && derived->hasAttr<FinalAttr>()) {
+ const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived);
+ CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase);
+ nonVirtualOffset += vBaseOffset;
+ vBase = nullptr; // we no longer have a virtual step
+ }
// Get the base pointer type.
mlir::Type baseValueTy = convertType((path.end()[-1])->getType());
assert(!cir::MissingFeatures::addressSpace());
- // The if statement here is redundant now, but it will be needed when we add
- // support for virtual base classes.
// If there is no virtual base, use cir.base_class_addr. It takes care of
// the adjustment and the null pointer check.
- if (nonVirtualOffset.isZero()) {
+ if (nonVirtualOffset.isZero() && !vBase) {
assert(!cir::MissingFeatures::sanitizers());
return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0,
/*assumeNotNull=*/true);
@@ -980,10 +989,17 @@ Address CIRGenFunction::getAddressOfBaseClass(
assert(!cir::MissingFeatures::sanitizers());
- // Apply the offset
- value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy,
- nonVirtualOffset.getQuantity(),
- /*assumeNotNull=*/true);
+ // Compute the virtual offset.
+ mlir::Value virtualOffset = nullptr;
+ if (vBase) {
+ virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset(
+ getLoc(loc), *this, value, derived, vBase);
+ }
+
+ // Apply both offsets.
+ value = applyNonVirtualAndVirtualOffset(
+ getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived,
+ vBase, baseValueTy, not nullCheckValue);
// Cast to the destination type.
value = value.withElementType(builder, baseValueTy);
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 1f7e3dd..83208bf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -210,6 +210,60 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall(
return emitCall(fnInfo, callee, returnValue, args, nullptr, loc);
}
+namespace {
+/// The parameters to pass to a usual operator delete.
+struct UsualDeleteParams {
+ TypeAwareAllocationMode typeAwareDelete = TypeAwareAllocationMode::No;
+ bool destroyingDelete = false;
+ bool size = false;
+ AlignedAllocationMode alignment = AlignedAllocationMode::No;
+};
+} // namespace
+
+// FIXME(cir): this should be shared with LLVM codegen
+static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *fd) {
+ UsualDeleteParams params;
+
+ const FunctionProtoType *fpt = fd->getType()->castAs<FunctionProtoType>();
+ auto ai = fpt->param_type_begin(), ae = fpt->param_type_end();
+
+ if (fd->isTypeAwareOperatorNewOrDelete()) {
+ params.typeAwareDelete = TypeAwareAllocationMode::Yes;
+ assert(ai != ae);
+ ++ai;
+ }
+
+ // The first argument after the type-identity parameter (if any) is
+ // always a void* (or C* for a destroying operator delete for class
+ // type C).
+ ++ai;
+
+ // The next parameter may be a std::destroying_delete_t.
+ if (fd->isDestroyingOperatorDelete()) {
+ params.destroyingDelete = true;
+ assert(ai != ae);
+ ++ai;
+ }
+
+ // Figure out what other parameters we should be implicitly passing.
+ if (ai != ae && (*ai)->isIntegerType()) {
+ params.size = true;
+ ++ai;
+ } else {
+ assert(!isTypeAwareAllocation(params.typeAwareDelete));
+ }
+
+ if (ai != ae && (*ai)->isAlignValT()) {
+ params.alignment = AlignedAllocationMode::Yes;
+ ++ai;
+ } else {
+ assert(!isTypeAwareAllocation(params.typeAwareDelete));
+ }
+
+ assert(ai == ae && "unexpected usual deallocation function parameter");
+ return params;
+}
+
static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
unsigned minElements,
mlir::Value &numElements,
@@ -332,6 +386,117 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf,
return rv;
}
+namespace {
+/// Calls the given 'operator delete' on a single object.
+struct CallObjectDelete final : EHScopeStack::Cleanup {
+ mlir::Value ptr;
+ const FunctionDecl *operatorDelete;
+ QualType elementType;
+
+ CallObjectDelete(mlir::Value ptr, const FunctionDecl *operatorDelete,
+ QualType elementType)
+ : ptr(ptr), operatorDelete(operatorDelete), elementType(elementType) {}
+
+ void emit(CIRGenFunction &cgf) override {
+ cgf.emitDeleteCall(operatorDelete, ptr, elementType);
+ }
+
+ // This is a placeholder until EHCleanupScope is implemented.
+ size_t getSize() const override {
+ assert(!cir::MissingFeatures::ehCleanupScope());
+ return sizeof(CallObjectDelete);
+ }
+};
+} // namespace
+
+/// Emit the code for deleting a single object.
+static void emitObjectDelete(CIRGenFunction &cgf, const CXXDeleteExpr *de,
+ Address ptr, QualType elementType) {
+ // C++11 [expr.delete]p3:
+ // If the static type of the object to be deleted is different from its
+ // dynamic type, the static type shall be a base class of the dynamic type
+ // of the object to be deleted and the static type shall have a virtual
+ // destructor or the behavior is undefined.
+ assert(!cir::MissingFeatures::emitTypeCheck());
+
+ const FunctionDecl *operatorDelete = de->getOperatorDelete();
+ assert(!operatorDelete->isDestroyingOperatorDelete());
+
+ // Find the destructor for the type, if applicable. If the
+ // destructor is virtual, we'll just emit the vcall and return.
+ const CXXDestructorDecl *dtor = nullptr;
+ if (const auto *rd = elementType->getAsCXXRecordDecl()) {
+ if (rd->hasDefinition() && !rd->hasTrivialDestructor()) {
+ dtor = rd->getDestructor();
+
+ if (dtor->isVirtual()) {
+ cgf.cgm.errorNYI(de->getSourceRange(),
+ "emitObjectDelete: virtual destructor");
+ }
+ }
+ }
+
+ // Make sure that we call delete even if the dtor throws.
+ // This doesn't have to a conditional cleanup because we're going
+ // to pop it off in a second.
+ cgf.ehStack.pushCleanup<CallObjectDelete>(
+ NormalAndEHCleanup, ptr.getPointer(), operatorDelete, elementType);
+
+ if (dtor) {
+ cgf.emitCXXDestructorCall(dtor, Dtor_Complete,
+ /*ForVirtualBase=*/false,
+ /*Delegating=*/false, ptr, elementType);
+ } else if (elementType.getObjCLifetime()) {
+ assert(!cir::MissingFeatures::objCLifetime());
+ cgf.cgm.errorNYI(de->getSourceRange(), "emitObjectDelete: ObjCLifetime");
+ }
+
+ // In traditional LLVM codegen null checks are emitted to save a delete call.
+ // In CIR we optimize for size by default, the null check should be added into
+ // this function callers.
+ assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls());
+
+ cgf.popCleanupBlock();
+}
+
+void CIRGenFunction::emitCXXDeleteExpr(const CXXDeleteExpr *e) {
+ const Expr *arg = e->getArgument();
+ Address ptr = emitPointerWithAlignment(arg);
+
+ // Null check the pointer.
+ //
+ // We could avoid this null check if we can determine that the object
+ // destruction is trivial and doesn't require an array cookie; we can
+ // unconditionally perform the operator delete call in that case. For now, we
+ // assume that deleted pointers are null rarely enough that it's better to
+ // keep the branch. This might be worth revisiting for a -O0 code size win.
+ //
+ // CIR note: emit the code size friendly by default for now, such as mentioned
+ // in `emitObjectDelete`.
+ assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls());
+ QualType deleteTy = e->getDestroyedType();
+
+ // A destroying operator delete overrides the entire operation of the
+ // delete expression.
+ if (e->getOperatorDelete()->isDestroyingOperatorDelete()) {
+ cgm.errorNYI(e->getSourceRange(),
+ "emitCXXDeleteExpr: destroying operator delete");
+ return;
+ }
+
+ // We might be deleting a pointer to array.
+ deleteTy = getContext().getBaseElementType(deleteTy);
+ ptr = ptr.withElementType(builder, convertTypeForMem(deleteTy));
+
+ if (e->isArrayForm()) {
+ assert(!cir::MissingFeatures::deleteArray());
+ cgm.errorNYI(e->getSourceRange(), "emitCXXDeleteExpr: array delete");
+ return;
+ } else {
+ emitObjectDelete(*this, e, ptr, deleteTy);
+ }
+}
+
mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
// The element type being allocated.
QualType allocType = getContext().getBaseElementType(e->getAllocatedType());
@@ -443,3 +608,53 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
allocSizeWithoutCookie);
return result.getPointer();
}
+
+void CIRGenFunction::emitDeleteCall(const FunctionDecl *deleteFD,
+ mlir::Value ptr, QualType deleteTy) {
+ assert(!cir::MissingFeatures::deleteArray());
+
+ const auto *deleteFTy = deleteFD->getType()->castAs<FunctionProtoType>();
+ CallArgList deleteArgs;
+
+ UsualDeleteParams params = getUsualDeleteParams(deleteFD);
+ auto paramTypeIt = deleteFTy->param_type_begin();
+
+ // Pass std::type_identity tag if present
+ if (isTypeAwareAllocation(params.typeAwareDelete))
+ cgm.errorNYI(deleteFD->getSourceRange(),
+ "emitDeleteCall: type aware delete");
+
+ // Pass the pointer itself.
+ QualType argTy = *paramTypeIt++;
+ mlir::Value deletePtr =
+ builder.createBitcast(ptr.getLoc(), ptr, convertType(argTy));
+ deleteArgs.add(RValue::get(deletePtr), argTy);
+
+ // Pass the std::destroying_delete tag if present.
+ if (params.destroyingDelete)
+ cgm.errorNYI(deleteFD->getSourceRange(),
+ "emitDeleteCall: destroying delete");
+
+ // Pass the size if the delete function has a size_t parameter.
+ if (params.size) {
+ QualType sizeType = *paramTypeIt++;
+ CharUnits deleteTypeSize = getContext().getTypeSizeInChars(deleteTy);
+ assert(mlir::isa<cir::IntType>(convertType(sizeType)) &&
+ "expected cir::IntType");
+ cir::ConstantOp size = builder.getConstInt(
+ *currSrcLoc, convertType(sizeType), deleteTypeSize.getQuantity());
+
+ deleteArgs.add(RValue::get(size), sizeType);
+ }
+
+ // Pass the alignment if the delete function has an align_val_t parameter.
+ if (isAlignedAllocation(params.alignment))
+ cgm.errorNYI(deleteFD->getSourceRange(),
+ "emitDeleteCall: aligned allocation");
+
+ assert(paramTypeIt == deleteFTy->param_type_end() &&
+ "unknown parameter to usual delete function");
+
+ // Emit the call to delete.
+ emitNewDeleteCall(*this, deleteFD, deleteFTy, deleteArgs);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index bd09d78..f4bbced 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -676,6 +676,10 @@ public:
mlir::Value VisitRealImag(const UnaryOperator *e,
QualType promotionType = QualType());
+ mlir::Value VisitUnaryExtension(const UnaryOperator *e) {
+ return Visit(e->getSubExpr());
+ }
+
mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) {
CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die);
return Visit(die->getExpr());
@@ -687,6 +691,10 @@ public:
mlir::Value VisitCXXNewExpr(const CXXNewExpr *e) {
return cgf.emitCXXNewExpr(e);
}
+ mlir::Value VisitCXXDeleteExpr(const CXXDeleteExpr *e) {
+ cgf.emitCXXDeleteExpr(e);
+ return {};
+ }
mlir::Value VisitCXXThrowExpr(const CXXThrowExpr *e) {
cgf.emitCXXThrowExpr(e);
@@ -1274,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e,
} else if (const auto *uo = dyn_cast<UnaryOperator>(e)) {
switch (uo->getOpcode()) {
case UO_Imag:
- cgf.cgm.errorNYI(e->getSourceRange(),
- "ScalarExprEmitter::emitPromoted unary imag");
- return {};
case UO_Real:
return VisitRealImag(uo, promotionType);
case UO_Minus:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 166435f..ef07db3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1197,6 +1197,8 @@ public:
bool delegating, Address thisAddr,
CallArgList &args, clang::SourceLocation loc);
+ void emitCXXDeleteExpr(const CXXDeleteExpr *e);
+
void emitCXXDestructorCall(const CXXDestructorDecl *dd, CXXDtorType type,
bool forVirtualBase, bool delegating,
Address thisAddr, QualType thisTy);
@@ -1244,6 +1246,9 @@ public:
void emitDelegatingCXXConstructorCall(const CXXConstructorDecl *ctor,
const FunctionArgList &args);
+ void emitDeleteCall(const FunctionDecl *deleteFD, mlir::Value ptr,
+ QualType deleteTy);
+
mlir::LogicalResult emitDoStmt(const clang::DoStmt &s);
/// Emit an expression as an initializer for an object (variable, field, etc.)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index eef23a0..c977ff9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -119,6 +119,19 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
cir::OptInfoAttr::get(&mlirContext,
cgo.OptimizationLevel,
cgo.OptimizeSize));
+ // Set the module name to be the name of the main file. TranslationUnitDecl
+ // often contains invalid source locations and isn't a reliable source for the
+ // module location.
+ FileID mainFileId = astContext.getSourceManager().getMainFileID();
+ const FileEntry &mainFile =
+ *astContext.getSourceManager().getFileEntryForID(mainFileId);
+ StringRef path = mainFile.tryGetRealPathName();
+ if (!path.empty()) {
+ theModule.setSymName(path);
+ theModule->setLoc(mlir::FileLineColLoc::get(&mlirContext, path,
+ /*line=*/0,
+ /*column=*/0));
+ }
}
CIRGenModule::~CIRGenModule() = default;
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index e842892..644c383 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s,
case Stmt::OMPSimdDirectiveClass:
case Stmt::OMPTileDirectiveClass:
case Stmt::OMPUnrollDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPForDirectiveClass:
case Stmt::OMPForSimdDirectiveClass:
case Stmt::OMPSectionsDirectiveClass:
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 58ef500..fb87036 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1355,9 +1355,11 @@ mlir::LogicalResult cir::GlobalOp::verify() {
return success();
}
-void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- llvm::StringRef sym_name, mlir::Type sym_type,
- bool isConstant, cir::GlobalLinkageKind linkage) {
+void cir::GlobalOp::build(
+ OpBuilder &odsBuilder, OperationState &odsState, llvm::StringRef sym_name,
+ mlir::Type sym_type, bool isConstant, cir::GlobalLinkageKind linkage,
+ function_ref<void(OpBuilder &, Location)> ctorBuilder,
+ function_ref<void(OpBuilder &, Location)> dtorBuilder) {
odsState.addAttribute(getSymNameAttrName(odsState.name),
odsBuilder.getStringAttr(sym_name));
odsState.addAttribute(getSymTypeAttrName(odsState.name),
@@ -1370,26 +1372,88 @@ void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage);
odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr);
+ Region *ctorRegion = odsState.addRegion();
+ if (ctorBuilder) {
+ odsBuilder.createBlock(ctorRegion);
+ ctorBuilder(odsBuilder, odsState.location);
+ }
+
+ Region *dtorRegion = odsState.addRegion();
+ if (dtorBuilder) {
+ odsBuilder.createBlock(dtorRegion);
+ dtorBuilder(odsBuilder, odsState.location);
+ }
+
odsState.addAttribute(getGlobalVisibilityAttrName(odsState.name),
cir::VisibilityAttr::get(odsBuilder.getContext()));
}
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes that
+/// correspond to a constant value for each operand, or null if that operand is
+/// not a constant.
+void cir::GlobalOp::getSuccessorRegions(
+ mlir::RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> &regions) {
+ // The `ctor` and `dtor` regions always branch back to the parent operation.
+ if (!point.isParent()) {
+ regions.push_back(RegionSuccessor());
+ return;
+ }
+
+ // Don't consider the ctor region if it is empty.
+ Region *ctorRegion = &this->getCtorRegion();
+ if (ctorRegion->empty())
+ ctorRegion = nullptr;
+
+ // Don't consider the dtor region if it is empty.
+ Region *dtorRegion = &this->getCtorRegion();
+ if (dtorRegion->empty())
+ dtorRegion = nullptr;
+
+ // If the condition isn't constant, both regions may be executed.
+ if (ctorRegion)
+ regions.push_back(RegionSuccessor(ctorRegion));
+ if (dtorRegion)
+ regions.push_back(RegionSuccessor(dtorRegion));
+}
+
static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op,
- TypeAttr type,
- Attribute initAttr) {
+ TypeAttr type, Attribute initAttr,
+ mlir::Region &ctorRegion,
+ mlir::Region &dtorRegion) {
+ auto printType = [&]() { p << ": " << type; };
if (!op.isDeclaration()) {
p << "= ";
- // This also prints the type...
- if (initAttr)
- printConstant(p, initAttr);
+ if (!ctorRegion.empty()) {
+ p << "ctor ";
+ printType();
+ p << " ";
+ p.printRegion(ctorRegion,
+ /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+ } else {
+ // This also prints the type...
+ if (initAttr)
+ printConstant(p, initAttr);
+ }
+
+ if (!dtorRegion.empty()) {
+ p << " dtor ";
+ p.printRegion(dtorRegion,
+ /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+ }
} else {
- p << ": " << type;
+ printType();
}
}
-static ParseResult
-parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
- Attribute &initialValueAttr) {
+static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser,
+ TypeAttr &typeAttr,
+ Attribute &initialValueAttr,
+ mlir::Region &ctorRegion,
+ mlir::Region &dtorRegion) {
mlir::Type opTy;
if (parser.parseOptionalEqual().failed()) {
// Absence of equal means a declaration, so we need to parse the type.
@@ -1397,16 +1461,38 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
if (parser.parseColonType(opTy))
return failure();
} else {
- // Parse constant with initializer, examples:
- // cir.global @y = #cir.fp<1.250000e+00> : !cir.double
- // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>>
- if (parseConstantValue(parser, initialValueAttr).failed())
- return failure();
+ // Parse contructor, example:
+ // cir.global @rgb = ctor : type { ... }
+ if (!parser.parseOptionalKeyword("ctor")) {
+ if (parser.parseColonType(opTy))
+ return failure();
+ auto parseLoc = parser.getCurrentLocation();
+ if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+ return failure();
+ if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed())
+ return failure();
+ } else {
+ // Parse constant with initializer, examples:
+ // cir.global @y = 3.400000e+00 : f32
+ // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>>
+ if (parseConstantValue(parser, initialValueAttr).failed())
+ return failure();
+
+ assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
+ "Non-typed attrs shouldn't appear here.");
+ auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
+ opTy = typedAttr.getType();
+ }
- assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
- "Non-typed attrs shouldn't appear here.");
- auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
- opTy = typedAttr.getType();
+ // Parse destructor, example:
+ // dtor { ... }
+ if (!parser.parseOptionalKeyword("dtor")) {
+ auto parseLoc = parser.getCurrentLocation();
+ if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+ return failure();
+ if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed())
+ return failure();
+ }
}
typeAttr = TypeAttr::get(opTy);
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 57db20f7..64f1917 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (std::optional<GCOVOptions> Options =
getGCOVOptions(CodeGenOpts, LangOpts))
PB.registerPipelineStartEPCallback(
- [Options](ModulePassManager &MPM, OptimizationLevel Level) {
- MPM.addPass(GCOVProfilerPass(*Options));
+ [this, Options](ModulePassManager &MPM, OptimizationLevel Level) {
+ MPM.addPass(
+ GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr()));
});
if (std::optional<InstrProfOptions> Options =
getInstrProfOptions(CodeGenOpts, LangOpts))
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 12c7d48..fee6bc0 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -26,6 +26,7 @@
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
+#include "clang/AST/LambdaCapture.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/VTableBuilder.h"
@@ -1903,46 +1904,61 @@ CGDebugInfo::createInlinedSubprogram(StringRef FuncName,
return SP;
}
+llvm::StringRef
+CGDebugInfo::GetLambdaCaptureName(const LambdaCapture &Capture) {
+ if (Capture.capturesThis())
+ return CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this";
+
+ assert(Capture.capturesVariable());
+
+ const ValueDecl *CaptureDecl = Capture.getCapturedVar();
+ assert(CaptureDecl && "Expected valid decl for captured variable.");
+
+ return CaptureDecl->getName();
+}
+
void CGDebugInfo::CollectRecordLambdaFields(
const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements,
llvm::DIType *RecordTy) {
// For C++11 Lambdas a Field will be the same as a Capture, but the Capture
// has the name and the location of the variable so we should iterate over
// both concurrently.
- const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(CXXDecl);
RecordDecl::field_iterator Field = CXXDecl->field_begin();
unsigned fieldno = 0;
for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(),
E = CXXDecl->captures_end();
I != E; ++I, ++Field, ++fieldno) {
- const LambdaCapture &C = *I;
- if (C.capturesVariable()) {
- SourceLocation Loc = C.getLocation();
- assert(!Field->isBitField() && "lambdas don't have bitfield members!");
- ValueDecl *V = C.getCapturedVar();
- StringRef VName = V->getName();
- llvm::DIFile *VUnit = getOrCreateFile(Loc);
- auto Align = getDeclAlignIfRequired(V, CGM.getContext());
- llvm::DIType *FieldType = createFieldType(
- VName, Field->getType(), Loc, Field->getAccess(),
- layout.getFieldOffset(fieldno), Align, VUnit, RecordTy, CXXDecl);
- elements.push_back(FieldType);
- } else if (C.capturesThis()) {
+ const LambdaCapture &Capture = *I;
+ const uint64_t FieldOffset =
+ CGM.getContext().getASTRecordLayout(CXXDecl).getFieldOffset(fieldno);
+
+ assert(!Field->isBitField() && "lambdas don't have bitfield members!");
+
+ SourceLocation Loc;
+ uint32_t Align = 0;
+
+ if (Capture.capturesThis()) {
// TODO: Need to handle 'this' in some way by probably renaming the
// this of the lambda class and having a field member of 'this' or
// by using AT_object_pointer for the function and having that be
// used as 'this' for semantic references.
- FieldDecl *f = *Field;
- llvm::DIFile *VUnit = getOrCreateFile(f->getLocation());
- QualType type = f->getType();
- StringRef ThisName =
- CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this";
- llvm::DIType *fieldType = createFieldType(
- ThisName, type, f->getLocation(), f->getAccess(),
- layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
-
- elements.push_back(fieldType);
+ Loc = Field->getLocation();
+ } else if (Capture.capturesVariable()) {
+ Loc = Capture.getLocation();
+
+ const ValueDecl *CaptureDecl = Capture.getCapturedVar();
+ assert(CaptureDecl && "Expected valid decl for captured variable.");
+
+ Align = getDeclAlignIfRequired(CaptureDecl, CGM.getContext());
+ } else {
+ continue;
}
+
+ llvm::DIFile *VUnit = getOrCreateFile(Loc);
+
+ elements.push_back(createFieldType(
+ GetLambdaCaptureName(Capture), Field->getType(), Loc,
+ Field->getAccess(), FieldOffset, Align, VUnit, RecordTy, CXXDecl));
}
}
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index f860773..78c3eb9 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -397,6 +397,7 @@ private:
void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F,
SmallVectorImpl<llvm::Metadata *> &E,
llvm::DICompositeType *RecordTy);
+ llvm::StringRef GetLambdaCaptureName(const LambdaCapture &Capture);
/// If the C++ class has vtable info then insert appropriate debug
/// info entry in EltTys vector.
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 4401006..92636f2 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -234,6 +234,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OMPInterchangeDirectiveClass:
EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
break;
+ case Stmt::OMPFuseDirectiveClass:
+ EmitOMPFuseDirective(cast<OMPFuseDirective>(*S));
+ break;
case Stmt::OMPForDirectiveClass:
EmitOMPForDirective(cast<OMPForDirective>(*S));
break;
@@ -2471,56 +2474,6 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
CaseRangeBlock = SavedCRBlock;
}
-static std::string
-SimplifyConstraint(const char *Constraint, const TargetInfo &Target,
- SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons=nullptr) {
- std::string Result;
-
- while (*Constraint) {
- switch (*Constraint) {
- default:
- Result += Target.convertConstraint(Constraint);
- break;
- // Ignore these
- case '*':
- case '?':
- case '!':
- case '=': // Will see this and the following in mult-alt constraints.
- case '+':
- break;
- case '#': // Ignore the rest of the constraint alternative.
- while (Constraint[1] && Constraint[1] != ',')
- Constraint++;
- break;
- case '&':
- case '%':
- Result += *Constraint;
- while (Constraint[1] && Constraint[1] == *Constraint)
- Constraint++;
- break;
- case ',':
- Result += "|";
- break;
- case 'g':
- Result += "imr";
- break;
- case '[': {
- assert(OutCons &&
- "Must pass output names to constraints with a symbolic name");
- unsigned Index;
- bool result = Target.resolveSymbolicName(Constraint, *OutCons, Index);
- assert(result && "Could not resolve symbolic name"); (void)result;
- Result += llvm::utostr(Index);
- break;
- }
- }
-
- Constraint++;
- }
-
- return Result;
-}
-
/// AddVariableConstraints - Look at AsmExpr and if it is a variable declared
/// as using a particular register add that as a constraint that will be used
/// in this asm stmt.
@@ -2899,8 +2852,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Simplify the output constraint.
std::string OutputConstraint(S.getOutputConstraint(i));
- OutputConstraint = SimplifyConstraint(OutputConstraint.c_str() + 1,
- getTarget(), &OutputConstraintInfos);
+ OutputConstraint = getTarget().simplifyConstraint(
+ StringRef(OutputConstraint).substr(1), &OutputConstraintInfos);
const Expr *OutExpr = S.getOutputExpr(i);
OutExpr = OutExpr->IgnoreParenNoopCasts(getContext());
@@ -3062,8 +3015,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Simplify the input constraint.
std::string InputConstraint(S.getInputConstraint(i));
- InputConstraint = SimplifyConstraint(InputConstraint.c_str(), getTarget(),
- &OutputConstraintInfos);
+ InputConstraint =
+ getTarget().simplifyConstraint(InputConstraint, &OutputConstraintInfos);
InputConstraint = AddVariableConstraints(
InputConstraint, *InputExpr->IgnoreParenNoopCasts(getContext()),
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index ba9c7c6..efc06a2 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -201,6 +201,24 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
} else {
llvm_unreachable("Unknown loop-based directive kind.");
}
+ doEmitPreinits(PreInits);
+ PreCondVars.restore(CGF);
+ }
+
+ void
+ emitPreInitStmt(CodeGenFunction &CGF,
+ const OMPCanonicalLoopSequenceTransformationDirective &S) {
+ const Stmt *PreInits;
+ if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) {
+ PreInits = Fuse->getPreInits();
+ } else {
+ llvm_unreachable(
+ "Unknown canonical loop sequence transform directive kind.");
+ }
+ doEmitPreinits(PreInits);
+ }
+
+ void doEmitPreinits(const Stmt *PreInits) {
if (PreInits) {
// CompoundStmts and DeclStmts are used as lists of PreInit statements and
// declarations. Since declarations must be visible in the the following
@@ -222,7 +240,6 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
CGF.EmitStmt(S);
}
}
- PreCondVars.restore(CGF);
}
public:
@@ -230,6 +247,11 @@ public:
: CodeGenFunction::RunCleanupsScope(CGF) {
emitPreInitStmt(CGF, S);
}
+ OMPLoopScope(CodeGenFunction &CGF,
+ const OMPCanonicalLoopSequenceTransformationDirective &S)
+ : CodeGenFunction::RunCleanupsScope(CGF) {
+ emitPreInitStmt(CGF, S);
+ }
};
class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
@@ -1929,6 +1951,15 @@ public:
CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
}
+ if (const auto *Dir =
+ dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) {
+ // For simplicity we reuse the loop scope similarly to what we do with
+ // OMPCanonicalLoopNestTransformationDirective do by being a subclass
+ // of OMPLoopBasedDirective.
+ Scope = new OMPLoopScope(CGF, *Dir);
+ CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
+ CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
+ }
}
~OMPTransformDirectiveScopeRAII() {
if (!Scope)
@@ -1956,8 +1987,7 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
return;
}
if (SimplifiedS == NextLoop) {
- if (auto *Dir =
- dyn_cast<OMPCanonicalLoopNestTransformationDirective>(SimplifiedS))
+ if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
SimplifiedS = Dir->getTransformedStmt();
if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
SimplifiedS = CanonLoop->getLoopStmt();
@@ -2952,6 +2982,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective(
EmitStmt(S.getTransformedStmt());
}
+void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
+ // Emit the de-sugared statement
+ OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
+ EmitStmt(S.getTransformedStmt());
+}
+
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 727487b..f0565c1 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3861,6 +3861,7 @@ public:
void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
void EmitOMPReverseDirective(const OMPReverseDirective &S);
void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S);
+ void EmitOMPFuseDirective(const OMPFuseDirective &S);
void EmitOMPForDirective(const OMPForDirective &S);
void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
void EmitOMPScopeDirective(const OMPScopeDirective &S);
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 98b30e08..8f09564 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -972,7 +972,7 @@ void PGOHash::combine(HashType Type) {
if (Count && Count % NumTypesPerWord == 0) {
using namespace llvm::support;
uint64_t Swapped =
- endian::byte_swap<uint64_t, llvm::endianness::little>(Working);
+ endian::byte_swap<uint64_t>(Working, llvm::endianness::little);
MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
Working = 0;
}
@@ -999,7 +999,7 @@ uint64_t PGOHash::finalize() {
} else {
using namespace llvm::support;
uint64_t Swapped =
- endian::byte_swap<uint64_t, llvm::endianness::little>(Working);
+ endian::byte_swap<uint64_t>(Working, llvm::endianness::little);
MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
}
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 07cf08c..6596ec0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
return CGF.Builder.CreateCall(F, {Src0, Src1});
}
+static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
+ if (AMDGCNScope == "agent")
+ return "device";
+ if (AMDGCNScope == "wavefront")
+ return "subgroup";
+ return AMDGCNScope;
+}
+
// For processing memory ordering and memory scope arguments of various
// amdgcn builtins.
-// \p Order takes a C++11 comptabile memory-ordering specifier and converts
+// \p Order takes a C++11 compatible memory-ordering specifier and converts
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
// specific SyncScopeID and writes it to \p SSID.
@@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
// Some of the atomic builtins take the scope as a string name.
StringRef scp;
if (llvm::getConstantStringInfo(Scope, scp)) {
+ if (getTarget().getTriple().isSPIRV())
+ scp = mapScopeToSPIRV(scp);
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
return;
}
@@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
SSID = llvm::SyncScope::System;
break;
case 1: // __MEMORY_SCOPE_DEVICE
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
break;
case 2: // __MEMORY_SCOPE_WRKGRP
SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
break;
case 3: // __MEMORY_SCOPE_WVFRNT
- SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
break;
case 4: // __MEMORY_SCOPE_SINGLE
SSID = llvm::SyncScope::SingleThread;
@@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
//
// The global/flat cases need to use agent scope to consistently produce
// the native instruction instead of a cmpxchg expansion.
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
AO = AtomicOrdering::Monotonic;
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index f110dba..85a13357 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -6613,6 +6613,9 @@ std::string Driver::GetStdModuleManifestPath(const Compilation &C,
const ToolChain &TC) const {
std::string error = "<NOT PRESENT>";
+ if (C.getArgs().hasArg(options::OPT_nostdlib))
+ return error;
+
switch (TC.GetCXXStdlibType(C.getArgs())) {
case ToolChain::CST_Libcxx: {
auto evaluate = [&](const char *library) -> std::optional<std::string> {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index e04b0e7..a28446a 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -55,7 +55,7 @@ namespace format {
TYPE(ConflictAlternative) \
TYPE(ConflictEnd) \
TYPE(ConflictStart) \
- /* l_brace of if/for/while */ \
+ /* l_brace of if/for/while/switch/catch */ \
TYPE(ControlStatementLBrace) \
TYPE(ControlStatementRBrace) \
TYPE(CppCastLParen) \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 6a8286d..0c9c88a 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -833,11 +833,6 @@ private:
if (Parent && Parent->is(TT_PointerOrReference))
Parent->overwriteFixedType(TT_BinaryOperator);
}
- // An arrow after an ObjC method expression is not a lambda arrow.
- if (CurrentToken->is(TT_ObjCMethodExpr) && CurrentToken->Next &&
- CurrentToken->Next->is(TT_LambdaArrow)) {
- CurrentToken->Next->overwriteFixedType(TT_Unknown);
- }
Left->MatchingParen = CurrentToken;
CurrentToken->MatchingParen = Left;
// FirstObjCSelectorName is set when a colon is found. This does
@@ -4026,29 +4021,28 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
}
}
- if (IsCpp &&
- (LineIsFunctionDeclaration ||
- (FirstNonComment && FirstNonComment->is(TT_CtorDtorDeclName))) &&
- Line.endsWith(tok::semi, tok::r_brace)) {
- auto *Tok = Line.Last->Previous;
- while (Tok->isNot(tok::r_brace))
- Tok = Tok->Previous;
- if (auto *LBrace = Tok->MatchingParen; LBrace && LBrace->is(TT_Unknown)) {
- assert(LBrace->is(tok::l_brace));
- Tok->setBlockKind(BK_Block);
- LBrace->setBlockKind(BK_Block);
- LBrace->setFinalizedType(TT_FunctionLBrace);
+ if (IsCpp) {
+ if ((LineIsFunctionDeclaration ||
+ (FirstNonComment && FirstNonComment->is(TT_CtorDtorDeclName))) &&
+ Line.endsWith(tok::semi, tok::r_brace)) {
+ auto *Tok = Line.Last->Previous;
+ while (Tok->isNot(tok::r_brace))
+ Tok = Tok->Previous;
+ if (auto *LBrace = Tok->MatchingParen; LBrace && LBrace->is(TT_Unknown)) {
+ assert(LBrace->is(tok::l_brace));
+ Tok->setBlockKind(BK_Block);
+ LBrace->setBlockKind(BK_Block);
+ LBrace->setFinalizedType(TT_FunctionLBrace);
+ }
}
- }
- if (IsCpp && SeenName && AfterLastAttribute &&
- mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
- AfterLastAttribute->MustBreakBefore = true;
- if (LineIsFunctionDeclaration)
- Line.ReturnTypeWrapped = true;
- }
+ if (SeenName && AfterLastAttribute &&
+ mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
+ AfterLastAttribute->MustBreakBefore = true;
+ if (LineIsFunctionDeclaration)
+ Line.ReturnTypeWrapped = true;
+ }
- if (IsCpp) {
if (!LineIsFunctionDeclaration) {
// Annotate */&/&& in `operator` function calls as binary operators.
for (const auto *Tok = FirstNonComment; Tok; Tok = Tok->Next) {
@@ -4094,6 +4088,11 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
}
}
+ if (First->is(TT_ElseLBrace)) {
+ First->CanBreakBefore = true;
+ First->MustBreakBefore = true;
+ }
+
bool InFunctionDecl = Line.MightBeFunctionDecl;
bool InParameterList = false;
for (auto *Current = First->Next; Current; Current = Current->Next) {
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 2c9766c9..6948b3d 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2268,7 +2268,7 @@ bool UnwrappedLineParser::tryToParseLambda() {
if (!tryToParseLambdaIntroducer())
return false;
- bool SeenArrow = false;
+ FormatToken *Arrow = nullptr;
bool InTemplateParameterList = false;
while (FormatTok->isNot(tok::l_brace)) {
@@ -2343,17 +2343,13 @@ bool UnwrappedLineParser::tryToParseLambda() {
case tok::ellipsis:
case tok::kw_true:
case tok::kw_false:
- if (SeenArrow || InTemplateParameterList) {
+ if (Arrow || InTemplateParameterList) {
nextToken();
break;
}
return true;
case tok::arrow:
- // This might or might not actually be a lambda arrow (this could be an
- // ObjC method invocation followed by a dereferencing arrow). We might
- // reset this back to TT_Unknown in TokenAnnotator.
- FormatTok->setFinalizedType(TT_LambdaArrow);
- SeenArrow = true;
+ Arrow = FormatTok;
nextToken();
break;
case tok::kw_requires: {
@@ -2375,6 +2371,9 @@ bool UnwrappedLineParser::tryToParseLambda() {
FormatTok->setFinalizedType(TT_LambdaLBrace);
LSquare.setFinalizedType(TT_LambdaLSquare);
+ if (Arrow)
+ Arrow->setFinalizedType(TT_LambdaArrow);
+
NestedLambdas.push_back(Line->SeenDecltypeAuto);
parseChildBlock();
assert(!NestedLambdas.empty());
@@ -2388,11 +2387,6 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
const FormatToken *LeftSquare = FormatTok;
nextToken();
if (Previous) {
- if (Previous->Tok.getIdentifierInfo() &&
- !Previous->isOneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield,
- tok::kw_co_return)) {
- return false;
- }
if (Previous->closesScope()) {
// Not a potential C-style cast.
if (Previous->isNot(tok::r_paren))
@@ -2402,6 +2396,13 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
// and `int (*)()`.
if (!BeforeRParen || !BeforeRParen->isOneOf(tok::greater, tok::r_paren))
return false;
+ } else if (Previous->is(tok::star)) {
+ Previous = Previous->getPreviousNonComment();
+ }
+ if (Previous && Previous->Tok.getIdentifierInfo() &&
+ !Previous->isOneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield,
+ tok::kw_co_return)) {
+ return false;
}
}
if (LeftSquare->isCppStructuredBinding(IsCpp))
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index edf0a09..877ab02 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -742,7 +742,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
Builder.defineMacro("__cpp_impl_coroutine", "201902L");
Builder.defineMacro("__cpp_designated_initializers", "201707L");
Builder.defineMacro("__cpp_impl_three_way_comparison", "201907L");
- //Builder.defineMacro("__cpp_modules", "201907L");
+ // Intentionally to set __cpp_modules to 1.
+ // See https://github.com/llvm/llvm-project/issues/71364 for details.
+ // Builder.defineMacro("__cpp_modules", "201907L");
+ Builder.defineMacro("__cpp_modules", "1");
Builder.defineMacro("__cpp_using_enum", "201907L");
}
// C++23 features.
diff --git a/clang/lib/Frontend/ModuleDependencyCollector.cpp b/clang/lib/Frontend/ModuleDependencyCollector.cpp
index 3b363f9..ff37065 100644
--- a/clang/lib/Frontend/ModuleDependencyCollector.cpp
+++ b/clang/lib/Frontend/ModuleDependencyCollector.cpp
@@ -91,10 +91,10 @@ void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) {
std::make_unique<ModuleDependencyMMCallbacks>(*this));
}
-static bool isCaseSensitivePath(StringRef Path) {
+static bool isCaseSensitivePath(llvm::vfs::FileSystem &VFS, StringRef Path) {
SmallString<256> TmpDest = Path, UpperDest, RealDest;
// Remove component traversals, links, etc.
- if (llvm::sys::fs::real_path(Path, TmpDest))
+ if (VFS.getRealPath(Path, TmpDest))
return true; // Current default value in vfs.yaml
Path = TmpDest;
@@ -104,7 +104,7 @@ static bool isCaseSensitivePath(StringRef Path) {
// already expects when sensitivity isn't setup.
for (auto &C : Path)
UpperDest.push_back(toUppercase(C));
- if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path == RealDest)
+ if (!VFS.getRealPath(UpperDest, RealDest) && Path == RealDest)
return false;
return true;
}
@@ -121,7 +121,8 @@ void ModuleDependencyCollector::writeFileMap() {
// Explicitly set case sensitivity for the YAML writer. For that, find out
// the sensitivity at the path where the headers all collected to.
- VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir));
+ VFSWriter.setCaseSensitivity(
+ isCaseSensitivePath(Canonicalizer.getFileSystem(), VFSDir));
// Do not rely on real path names when executing the crash reproducer scripts
// since we only want to actually use the files we have on the VFS cache.
@@ -153,7 +154,7 @@ std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src,
} else {
// When collecting entries from input vfsoverlays, copy the external
// contents into the cache but still map from the source.
- if (!fs::exists(Dst))
+ if (!Canonicalizer.getFileSystem().exists(Dst))
return std::error_code();
path::append(CacheDst, Dst);
Paths.CopyFrom = Dst;
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index a7f7099..d6ba19a 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2311,10 +2311,9 @@ _mm256_cvttps_epi32(__m256 __a)
/// \param __a
/// A 256-bit vector of [4 x double].
/// \returns A 64 bit double containing the first element of the input vector.
-static __inline double __DEFAULT_FN_ATTRS
-_mm256_cvtsd_f64(__m256d __a)
-{
- return __a[0];
+static __inline double __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtsd_f64(__m256d __a) {
+ return __a[0];
}
/// Returns the first element of the input vector of [8 x i32].
@@ -2327,11 +2326,10 @@ _mm256_cvtsd_f64(__m256d __a)
/// \param __a
/// A 256-bit vector of [8 x i32].
/// \returns A 32 bit integer containing the first element of the input vector.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_cvtsi256_si32(__m256i __a)
-{
- __v8si __b = (__v8si)__a;
- return __b[0];
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtsi256_si32(__m256i __a) {
+ __v8si __b = (__v8si)__a;
+ return __b[0];
}
/// Returns the first element of the input vector of [8 x float].
@@ -2344,10 +2342,9 @@ _mm256_cvtsi256_si32(__m256i __a)
/// \param __a
/// A 256-bit vector of [8 x float].
/// \returns A 32 bit float containing the first element of the input vector.
-static __inline float __DEFAULT_FN_ATTRS
-_mm256_cvtss_f32(__m256 __a)
-{
- return __a[0];
+static __inline float __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtss_f32(__m256 __a) {
+ return __a[0];
}
/* Vector replicate */
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 8605ba2..a2c6957 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -1299,7 +1299,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
Diag(Tok, getLangOpts().CPlusPlus23
? diag::warn_cxx20_compat_decl_attrs_on_lambda
: diag::ext_decl_attrs_on_lambda)
- << Tok.getIdentifierInfo() << Tok.isRegularKeywordAttribute();
+ << Tok.isRegularKeywordAttribute() << Tok.getIdentifierInfo();
MaybeParseCXX11Attributes(D);
}
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 02f3f10..04f29c8 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2968,6 +2968,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
OpenLoc, CloseLoc);
}
+OMPClause *Parser::ParseOpenMPLoopRangeClause() {
+ SourceLocation ClauseNameLoc = ConsumeToken();
+ SourceLocation FirstLoc, CountLoc;
+
+ BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+ if (T.consumeOpen()) {
+ Diag(Tok, diag::err_expected) << tok::l_paren;
+ return nullptr;
+ }
+
+ FirstLoc = Tok.getLocation();
+ ExprResult FirstVal = ParseConstantExpression();
+ if (!FirstVal.isUsable()) {
+ T.skipToEnd();
+ return nullptr;
+ }
+
+ ExpectAndConsume(tok::comma);
+
+ CountLoc = Tok.getLocation();
+ ExprResult CountVal = ParseConstantExpression();
+ if (!CountVal.isUsable()) {
+ T.skipToEnd();
+ return nullptr;
+ }
+
+ T.consumeClose();
+
+ return Actions.OpenMP().ActOnOpenMPLoopRangeClause(
+ FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(),
+ FirstLoc, CountLoc, T.getCloseLocation());
+}
+
OMPClause *Parser::ParseOpenMPPermutationClause() {
SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
SmallVector<Expr *> ArgExprs;
@@ -3473,6 +3506,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
}
Clause = ParseOpenMPClause(CKind, WrongDirective);
break;
+ case OMPC_looprange:
+ Clause = ParseOpenMPLoopRangeClause();
+ break;
default:
break;
}
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 1b66d83..8606227 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -983,10 +983,9 @@ static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use,
case UninitUse::AfterDecl:
case UninitUse::AfterCall:
S.Diag(VD->getLocation(), diag::warn_sometimes_uninit_var)
- << VD->getDeclName() << IsCapturedByBlock
- << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5)
- << const_cast<DeclContext*>(VD->getLexicalDeclContext())
- << VD->getSourceRange();
+ << VD->getDeclName() << IsCapturedByBlock
+ << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5)
+ << VD->getLexicalDeclContext() << VD->getSourceRange();
S.Diag(Use.getUser()->getBeginLoc(), diag::note_uninit_var_use)
<< IsCapturedByBlock << Use.getUser()->getSourceRange();
return;
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index d238b79..dc6d232 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -193,7 +193,7 @@ DiagRecursiveConstraintEval(Sema &S, llvm::FoldingSetNodeID &ID,
// Sema::InstantiatingTemplate::isAlreadyBeingInstantiated function.
if (S.SatisfactionStackContains(Templ, ID)) {
S.Diag(E->getExprLoc(), diag::err_constraint_depends_on_self)
- << const_cast<Expr *>(E) << E->getSourceRange();
+ << E << E->getSourceRange();
return true;
}
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 9ef7a26..0069b08 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -18909,8 +18909,7 @@ ExprResult Sema::VerifyBitField(SourceLocation FieldLoc,
// 'bool'.
if (BitfieldIsOverwide && !FieldTy->isBooleanType() && FieldName) {
Diag(FieldLoc, diag::warn_bitfield_width_exceeds_type_width)
- << FieldName << toString(Value, 10)
- << (unsigned)TypeWidth;
+ << FieldName << Value << (unsigned)TypeWidth;
}
}
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 552c929..a0483c3 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1493,6 +1493,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Stmt::OMPUnrollDirectiveClass:
case Stmt::OMPReverseDirectiveClass:
case Stmt::OMPInterchangeDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPSingleDirectiveClass:
case Stmt::OMPTargetDataDirectiveClass:
case Stmt::OMPTargetDirectiveClass:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 3b267c1..06b2529 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16791,12 +16791,11 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
Expr *OrigExpr = E;
bool IsMS = false;
- // CUDA device code does not support varargs.
+ // CUDA device global function does not support varargs.
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
if (const FunctionDecl *F = dyn_cast<FunctionDecl>(CurContext)) {
CUDAFunctionTarget T = CUDA().IdentifyTarget(F);
- if (T == CUDAFunctionTarget::Global || T == CUDAFunctionTarget::Device ||
- T == CUDAFunctionTarget::HostDevice)
+ if (T == CUDAFunctionTarget::Global)
return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device));
}
}
@@ -20108,8 +20107,9 @@ static void DoMarkVarDeclReferenced(
bool NeededForConstantEvaluation =
isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr;
- bool NeedDefinition =
- OdrUse == OdrUseContext::Used || NeededForConstantEvaluation;
+ bool NeedDefinition = OdrUse == OdrUseContext::Used ||
+ NeededForConstantEvaluation ||
+ Var->getType()->isUndeducedType();
assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
"Can't instantiate a partial template specialization.");
diff --git a/clang/lib/Sema/SemaOpenACCAtomic.cpp b/clang/lib/Sema/SemaOpenACCAtomic.cpp
index a9319dc..ad21129 100644
--- a/clang/lib/Sema/SemaOpenACCAtomic.cpp
+++ b/clang/lib/Sema/SemaOpenACCAtomic.cpp
@@ -454,9 +454,7 @@ class AtomicOperandChecker {
// If nothing matches, error out.
DiagnoseInvalidAtomic(BinInf->FoundExpr->getExprLoc(),
SemaRef.PDiag(diag::note_acc_atomic_mismatch_operand)
- << const_cast<Expr *>(AssignInf.LHS)
- << const_cast<Expr *>(BinInf->LHS)
- << const_cast<Expr *>(BinInf->RHS));
+ << AssignInf.LHS << BinInf->LHS << BinInf->RHS);
return IDACInfo::Fail();
}
@@ -592,8 +590,7 @@ class AtomicOperandChecker {
PartialDiagnostic PD =
SemaRef.PDiag(diag::note_acc_atomic_mismatch_compound_operand)
- << FirstKind << const_cast<Expr *>(FirstX) << SecondKind
- << const_cast<Expr *>(SecondX);
+ << FirstKind << FirstX << SecondKind << SecondX;
return DiagnoseInvalidAtomic(SecondX->getExprLoc(), PD);
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 48e06d1..0fa21e8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2490,7 +2490,8 @@ VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
DSAStackTy::DSAVarData DVarTop =
DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode());
if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind) &&
- (!VD || VD->hasLocalStorage() || !DVarTop.AppliedToPointee))
+ (!VD || VD->hasLocalStorage() ||
+ !(DVarTop.AppliedToPointee && DVarTop.CKind != OMPC_reduction)))
return VD ? VD : cast<VarDecl>(DVarTop.PrivateCopy->getDecl());
// Threadprivate variables must not be captured.
if (isOpenMPThreadPrivate(DVarTop.CKind))
@@ -4569,6 +4570,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind,
case OMPD_unroll:
case OMPD_reverse:
case OMPD_interchange:
+ case OMPD_fuse:
case OMPD_assume:
break;
default:
@@ -6410,6 +6412,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
EndLoc);
break;
+ case OMPD_fuse:
+ Res =
+ ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
+ break;
case OMPD_for:
Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
VarsWithInheritedDSA);
@@ -9488,7 +9494,9 @@ static bool checkOpenMPIterationSpace(
// sharing attributes.
VarsWithImplicitDSA.erase(LCDecl);
- assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars");
+ assert((isOpenMPLoopDirective(DKind) ||
+ isOpenMPCanonicalLoopSequenceTransformationDirective(DKind)) &&
+ "DSA for non-loop vars");
// Check test-expr.
HasErrors |= ISC.checkAndSetCond(For ? For->getCond() : CXXFor->getCond());
@@ -9916,7 +9924,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
unsigned NumLoops = std::max(OrderedLoopCount, NestedLoopCount);
SmallVector<LoopIterationSpace, 4> IterSpaces(NumLoops);
if (!OMPLoopBasedDirective::doForAllLoops(
- AStmt->IgnoreContainers(!isOpenMPLoopTransformationDirective(DKind)),
+ AStmt->IgnoreContainers(
+ !isOpenMPCanonicalLoopNestTransformationDirective(DKind)),
SupportsNonPerfectlyNested, NumLoops,
[DKind, &SemaRef, &DSA, NumLoops, NestedLoopCount,
CollapseLoopCountExpr, OrderedLoopCountExpr, &VarsWithImplicitDSA,
@@ -9938,8 +9947,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
}
return false;
},
- [&SemaRef,
- &Captures](OMPCanonicalLoopNestTransformationDirective *Transform) {
+ [&SemaRef, &Captures](OMPLoopTransformationDirective *Transform) {
Stmt *DependentPreInits = Transform->getPreInits();
if (!DependentPreInits)
return;
@@ -9954,7 +9962,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
auto *D = cast<VarDecl>(C);
DeclRefExpr *Ref = buildDeclRefExpr(
SemaRef, D, D->getType().getNonReferenceType(),
- Transform->getBeginLoc());
+ cast<OMPExecutableDirective>(Transform->getDirective())
+ ->getBeginLoc());
Captures[Ref] = Ref;
}
}
@@ -14404,10 +14413,34 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
}
+/// Updates OriginalInits by checking Transform against loop transformation
+/// directives and appending their pre-inits if a match is found.
+static void updatePreInits(OMPLoopTransformationDirective *Transform,
+ SmallVectorImpl<Stmt *> &PreInits) {
+ Stmt *Dir = Transform->getDirective();
+ switch (Dir->getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT) \
+ case Stmt::CLASS##Class: \
+ appendFlattenedStmtList(PreInits, \
+ static_cast<const CLASS *>(Dir)->getPreInits()); \
+ break;
+#define OMPCANONICALLOOPNESTTRANSFORMATIONDIRECTIVE(CLASS, PARENT) \
+ COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT)
+#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT) \
+ COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT)
+#include "clang/AST/StmtNodes.inc"
+#undef COMMON_OMP_LOOP_TRANSFORMATION
+ default:
+ llvm_unreachable("Not a loop transformation");
+ }
+}
+
bool SemaOpenMP::checkTransformableLoopNest(
OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
- Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits) {
+ Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits) {
OriginalInits.emplace_back();
bool Result = OMPLoopBasedDirective::doForAllLoops(
AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops,
@@ -14433,29 +14466,268 @@ bool SemaOpenMP::checkTransformableLoopNest(
OriginalInits.emplace_back();
return false;
},
- [&OriginalInits](OMPLoopBasedDirective *Transform) {
- Stmt *DependentPreInits;
- if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPStripeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else
- llvm_unreachable("Unhandled loop transformation");
-
- appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+ [&OriginalInits](OMPLoopTransformationDirective *Transform) {
+ updatePreInits(Transform, OriginalInits.back());
});
assert(OriginalInits.back().empty() && "No preinit after innermost loop");
OriginalInits.pop_back();
return Result;
}
-/// Add preinit statements that need to be propageted from the selected loop.
+/// Counts the total number of OpenMP canonical nested loops, including the
+/// outermost loop (the original loop). PRECONDITION of this visitor is that it
+/// must be invoked from the original loop to be analyzed. The traversal stops
+/// for Decl's and Expr's given that they may contain inner loops that must not
+/// be counted.
+///
+/// Example AST structure for the code:
+///
+/// int main() {
+/// #pragma omp fuse
+/// {
+/// for (int i = 0; i < 100; i++) { <-- Outer loop
+/// []() {
+/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP (1)
+/// };
+/// for(int j = 0; j < 5; ++j) {} <-- Inner loop
+/// }
+/// for (int r = 0; i < 100; i++) { <-- Outer loop
+/// struct LocalClass {
+/// void bar() {
+/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP (2)
+/// }
+/// };
+/// for(int k = 0; k < 10; ++k) {} <-- Inner loop
+/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP (3)
+/// }
+/// }
+/// }
+/// (1) because in a different function (here: a lambda)
+/// (2) because in a different function (here: class method)
+/// (3) because considered to be intervening-code of non-perfectly nested loop
+/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops.
+class NestedLoopCounterVisitor final : public DynamicRecursiveASTVisitor {
+private:
+ unsigned NestedLoopCount = 0;
+
+public:
+ explicit NestedLoopCounterVisitor() = default;
+
+ unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+ bool VisitForStmt(ForStmt *FS) override {
+ ++NestedLoopCount;
+ return true;
+ }
+
+ bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
+ ++NestedLoopCount;
+ return true;
+ }
+
+ bool TraverseStmt(Stmt *S) override {
+ if (!S)
+ return true;
+
+ // Skip traversal of all expressions, including special cases like
+ // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+ // may contain inner statements (and even loops), but they are not part
+ // of the syntactic body of the surrounding loop structure.
+ // Therefore must not be counted.
+ if (isa<Expr>(S))
+ return true;
+
+ // Only recurse into CompoundStmt (block {}) and loop bodies.
+ if (isa<CompoundStmt, ForStmt, CXXForRangeStmt>(S)) {
+ return DynamicRecursiveASTVisitor::TraverseStmt(S);
+ }
+
+ // Stop traversal of the rest of statements, that break perfect
+ // loop nesting, such as control flow (IfStmt, SwitchStmt...).
+ return true;
+ }
+
+ bool TraverseDecl(Decl *D) override {
+ // Stop in the case of finding a declaration, it is not important
+ // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+ // FunctionDecl...).
+ return true;
+ }
+};
+
+bool SemaOpenMP::analyzeLoopSequence(Stmt *LoopSeqStmt,
+ LoopSequenceAnalysis &SeqAnalysis,
+ ASTContext &Context,
+ OpenMPDirectiveKind Kind) {
+ VarsWithInheritedDSAType TmpDSA;
+ // Helper Lambda to handle storing initialization and body statements for
+ // both ForStmt and CXXForRangeStmt.
+ auto StoreLoopStatements = [](LoopAnalysis &Analysis, Stmt *LoopStmt) {
+ if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
+ Analysis.OriginalInits.push_back(For->getInit());
+ Analysis.TheForStmt = For;
+ } else {
+ auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt);
+ Analysis.OriginalInits.push_back(CXXFor->getBeginStmt());
+ Analysis.TheForStmt = CXXFor;
+ }
+ };
+
+ // Helper lambda functions to encapsulate the processing of different
+ // derivations of the canonical loop sequence grammar
+ // Modularized code for handling loop generation and transformations.
+ auto AnalyzeLoopGeneration = [&](Stmt *Child) {
+ auto *LoopTransform = cast<OMPLoopTransformationDirective>(Child);
+ Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
+ unsigned NumGeneratedTopLevelLoops =
+ LoopTransform->getNumGeneratedTopLevelLoops();
+ // Handle the case where transformed statement is not available due to
+ // dependent contexts
+ if (!TransformedStmt) {
+ if (NumGeneratedTopLevelLoops > 0) {
+ SeqAnalysis.LoopSeqSize += NumGeneratedTopLevelLoops;
+ return true;
+ }
+ // Unroll full (0 loops produced)
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ // Handle loop transformations with multiple loop nests
+ // Unroll full
+ if (!NumGeneratedTopLevelLoops) {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ // Loop transformatons such as split or loopranged fuse
+ if (NumGeneratedTopLevelLoops > 1) {
+ // Get the preinits related to this loop sequence generating
+ // loop transformation (i.e loopranged fuse, split...)
+ // These preinits differ slightly from regular inits/pre-inits related
+ // to single loop generating loop transformations (interchange, unroll)
+ // given that they are not bounded to a particular loop nest
+ // so they need to be treated independently
+ updatePreInits(LoopTransform, SeqAnalysis.LoopSequencePreInits);
+ return analyzeLoopSequence(TransformedStmt, SeqAnalysis, Context, Kind);
+ }
+ // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
+ // Process the transformed loop statement
+ LoopAnalysis &NewTransformedSingleLoop =
+ SeqAnalysis.Loops.emplace_back(Child);
+ unsigned IsCanonical = checkOpenMPLoop(
+ Kind, nullptr, nullptr, TransformedStmt, SemaRef, *DSAStack, TmpDSA,
+ NewTransformedSingleLoop.HelperExprs);
+
+ if (!IsCanonical)
+ return false;
+
+ StoreLoopStatements(NewTransformedSingleLoop, TransformedStmt);
+ updatePreInits(LoopTransform, NewTransformedSingleLoop.TransformsPreInits);
+
+ SeqAnalysis.LoopSeqSize++;
+ return true;
+ };
+
+ // Modularized code for handling regular canonical loops.
+ auto AnalyzeRegularLoop = [&](Stmt *Child) {
+ LoopAnalysis &NewRegularLoop = SeqAnalysis.Loops.emplace_back(Child);
+ unsigned IsCanonical =
+ checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+ TmpDSA, NewRegularLoop.HelperExprs);
+
+ if (!IsCanonical)
+ return false;
+
+ StoreLoopStatements(NewRegularLoop, Child);
+ NestedLoopCounterVisitor NLCV;
+ NLCV.TraverseStmt(Child);
+ return true;
+ };
+
+ // High level grammar validation.
+ for (Stmt *Child : LoopSeqStmt->children()) {
+ if (!Child)
+ continue;
+ // Skip over non-loop-sequence statements.
+ if (!LoopSequenceAnalysis::isLoopSequenceDerivation(Child)) {
+ Child = Child->IgnoreContainers();
+ // Ignore empty compound statement.
+ if (!Child)
+ continue;
+ // In the case of a nested loop sequence ignoring containers would not
+ // be enough, a recurisve transversal of the loop sequence is required.
+ if (isa<CompoundStmt>(Child)) {
+ if (!analyzeLoopSequence(Child, SeqAnalysis, Context, Kind))
+ return false;
+ // Already been treated, skip this children
+ continue;
+ }
+ }
+ // Regular loop sequence handling.
+ if (LoopSequenceAnalysis::isLoopSequenceDerivation(Child)) {
+ if (LoopAnalysis::isLoopTransformation(Child)) {
+ if (!AnalyzeLoopGeneration(Child))
+ return false;
+ // AnalyzeLoopGeneration updates SeqAnalysis.LoopSeqSize accordingly.
+ } else {
+ if (!AnalyzeRegularLoop(Child))
+ return false;
+ SeqAnalysis.LoopSeqSize++;
+ }
+ } else {
+ // Report error for invalid statement inside canonical loop sequence.
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+ OpenMPDirectiveKind Kind, Stmt *AStmt, LoopSequenceAnalysis &SeqAnalysis,
+ ASTContext &Context) {
+ // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+ // the grammar:
+ //
+ // canonical-loop-sequence:
+ // {
+ // loop-sequence+
+ // }
+ // where loop-sequence can be any of the following:
+ // 1. canonical-loop-sequence
+ // 2. loop-nest
+ // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+ //
+ // To recognise and traverse this structure the helper function
+ // analyzeLoopSequence serves as the recurisve entry point
+ // and tries to match the input AST to the canonical loop sequence grammar
+ // structure. This function will perform both a semantic and syntactical
+ // analysis of the given statement according to OpenMP 6.0 definition of
+ // the aforementioned canonical loop sequence.
+
+ // We expect an outer compound statement.
+ if (!isa<CompoundStmt>(AStmt)) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+
+ // Recursive entry point to process the main loop sequence
+ if (!analyzeLoopSequence(AStmt, SeqAnalysis, Context, Kind))
+ return false;
+
+ // Diagnose an empty loop sequence.
+ if (!SeqAnalysis.LoopSeqSize) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ return true;
+}
+
+/// Add preinit statements that need to be propagated from the selected loop.
static void addLoopPreInits(ASTContext &Context,
OMPLoopBasedDirective::HelperExprs &LoopHelper,
Stmt *LoopStmt, ArrayRef<Stmt *> OriginalInit,
@@ -14540,7 +14812,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
// Verify and diagnose loop nest.
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
Stmt *Body = nullptr;
- SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body,
OriginalInits))
return StmtError();
@@ -14817,7 +15089,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef<OMPClause *> Clauses,
// Verify and diagnose loop nest.
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
Stmt *Body = nullptr;
- SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers,
Body, OriginalInits))
return StmtError();
@@ -15078,7 +15350,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
Stmt *Body = nullptr;
SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
NumLoops);
- SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers,
Body, OriginalInits))
return StmtError();
@@ -15348,7 +15620,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt,
Stmt *Body = nullptr;
SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
NumLoops);
- SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers,
Body, OriginalInits))
return StmtError();
@@ -15540,7 +15812,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
// Verify and diagnose loop nest.
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
Stmt *Body = nullptr;
- SmallVector<SmallVector<Stmt *, 0>, 2> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, 2> OriginalInits;
if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops,
LoopHelpers, Body, OriginalInits))
return StmtError();
@@ -15716,6 +15988,484 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
buildPreInits(Context, PreInits));
}
+StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt,
+ SourceLocation StartLoc,
+ SourceLocation EndLoc) {
+
+ ASTContext &Context = getASTContext();
+ DeclContext *CurrContext = SemaRef.CurContext;
+ Scope *CurScope = SemaRef.getCurScope();
+ CaptureVars CopyTransformer(SemaRef);
+
+ // Ensure the structured block is not empty
+ if (!AStmt)
+ return StmtError();
+
+ // Defer transformation in dependent contexts
+ // The NumLoopNests argument is set to a placeholder 1 (even though
+ // using looprange fuse could yield up to 3 top level loop nests)
+ // because a dependent context could prevent determining its true value
+ if (CurrContext->isDependentContext())
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+ /* NumLoops */ 1, AStmt, nullptr, nullptr);
+
+ // Validate that the potential loop sequence is transformable for fusion
+ // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
+ LoopSequenceAnalysis SeqAnalysis;
+ if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, SeqAnalysis, Context))
+ return StmtError();
+
+ // SeqAnalysis.LoopSeqSize exists mostly to handle dependent contexts,
+ // otherwise it must be the same as SeqAnalysis.Loops.size().
+ assert(SeqAnalysis.LoopSeqSize == SeqAnalysis.Loops.size() &&
+ "Inconsistent size of the loop sequence and the number of loops "
+ "found in the sequence");
+
+ // Handle clauses, which can be any of the following: [looprange, apply]
+ const auto *LRC =
+ OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
+
+ // The clause arguments are invalidated if any error arises
+ // such as non-constant or non-positive arguments
+ if (LRC && (!LRC->getFirst() || !LRC->getCount()))
+ return StmtError();
+
+ // Delayed semantic check of LoopRange constraint
+ // Evaluates the loop range arguments and returns the first and count values
+ auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count,
+ uint64_t &FirstVal,
+ uint64_t &CountVal) {
+ llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context);
+ llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context);
+ FirstVal = FirstInt.getZExtValue();
+ CountVal = CountInt.getZExtValue();
+ };
+
+ // OpenMP [6.0, Restrictions]
+ // first + count - 1 must not evaluate to a value greater than the
+ // loop sequence length of the associated canonical loop sequence.
+ auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
+ unsigned NumLoops) -> bool {
+ return FirstVal + CountVal - 1 <= NumLoops;
+ };
+ uint64_t FirstVal = 1, CountVal = 0, LastVal = SeqAnalysis.LoopSeqSize;
+
+ // Validates the loop range after evaluating the semantic information
+ // and ensures that the range is valid for the given loop sequence size.
+ // Expressions are evaluated at compile time to obtain constant values.
+ if (LRC) {
+ EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
+ CountVal);
+ if (CountVal == 1)
+ SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion)
+ << getOpenMPDirectiveName(OMPD_fuse);
+
+ if (!ValidLoopRange(FirstVal, CountVal, SeqAnalysis.LoopSeqSize)) {
+ SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange)
+ << getOpenMPDirectiveName(OMPD_fuse) << FirstVal
+ << (FirstVal + CountVal - 1) << SeqAnalysis.LoopSeqSize;
+ return StmtError();
+ }
+
+ LastVal = FirstVal + CountVal - 1;
+ }
+
+ // Complete fusion generates a single canonical loop nest
+ // However looprange clause may generate several loop nests
+ unsigned NumGeneratedTopLevelLoops =
+ LRC ? SeqAnalysis.LoopSeqSize - CountVal + 1 : 1;
+
+ // Emit a warning for redundant loop fusion when the sequence contains only
+ // one loop.
+ if (SeqAnalysis.LoopSeqSize == 1)
+ SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion)
+ << getOpenMPDirectiveName(OMPD_fuse);
+
+ // Select the type with the largest bit width among all induction variables
+ QualType IVType =
+ SeqAnalysis.Loops[FirstVal - 1].HelperExprs.IterationVarRef->getType();
+ for (unsigned I : llvm::seq<unsigned>(FirstVal, LastVal)) {
+ QualType CurrentIVType =
+ SeqAnalysis.Loops[I].HelperExprs.IterationVarRef->getType();
+ if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
+ IVType = CurrentIVType;
+ }
+ }
+ uint64_t IVBitWidth = Context.getIntWidth(IVType);
+
+ // Create pre-init declarations for all loops lower bounds, upper bounds,
+ // strides and num-iterations for every top level loop in the fusion
+ SmallVector<VarDecl *, 4> LBVarDecls;
+ SmallVector<VarDecl *, 4> STVarDecls;
+ SmallVector<VarDecl *, 4> NIVarDecls;
+ SmallVector<VarDecl *, 4> UBVarDecls;
+ SmallVector<VarDecl *, 4> IVVarDecls;
+
+ // Helper lambda to create variables for bounds, strides, and other
+ // expressions. Generates both the variable declaration and the corresponding
+ // initialization statement.
+ auto CreateHelperVarAndStmt =
+ [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName,
+ unsigned I, bool NeedsNewVD = false) {
+ Expr *TransformedExpr =
+ AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy));
+ if (!TransformedExpr)
+ return std::pair<VarDecl *, StmtResult>(nullptr, StmtError());
+
+ auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str();
+
+ VarDecl *VD;
+ if (NeedsNewVD) {
+ VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name);
+ SemaRef.AddInitializerToDecl(VD, TransformedExpr, false);
+ } else {
+ // Create a unique variable name
+ DeclRefExpr *DRE = cast<DeclRefExpr>(TransformedExpr);
+ VD = cast<VarDecl>(DRE->getDecl());
+ VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name));
+ }
+ // Create the corresponding declaration statement
+ StmtResult DeclStmt = new (Context) class DeclStmt(
+ DeclGroupRef(VD), SourceLocation(), SourceLocation());
+ return std::make_pair(VD, DeclStmt);
+ };
+
+ // PreInits hold a sequence of variable declarations that must be executed
+ // before the fused loop begins. These include bounds, strides, and other
+ // helper variables required for the transformation. Other loop transforms
+ // also contain their own preinits
+ SmallVector<Stmt *> PreInits;
+
+ // Update the general preinits using the preinits generated by loop sequence
+ // generating loop transformations. These preinits differ slightly from
+ // single-loop transformation preinits, as they can be detached from a
+ // specific loop inside multiple generated loop nests. This happens
+ // because certain helper variables, like '.omp.fuse.max', are introduced to
+ // handle fused iteration spaces and may not be directly tied to a single
+ // original loop. The preinit structure must ensure that hidden variables
+ // like '.omp.fuse.max' are still properly handled.
+ // Transformations that apply this concept: Loopranged Fuse, Split
+ llvm::append_range(PreInits, SeqAnalysis.LoopSequencePreInits);
+
+ // Process each single loop to generate and collect declarations
+ // and statements for all helper expressions related to
+ // particular single loop nests
+
+ // Also In the case of the fused loops, we keep track of their original
+ // inits by appending them to their preinits statement, and in the case of
+ // transformations, also append their preinits (which contain the original
+ // loop initialization statement or other statements)
+
+ // Firstly we need to set TransformIndex to match the begining of the
+ // looprange section
+ unsigned int TransformIndex = 0;
+ for (unsigned I : llvm::seq<unsigned>(FirstVal - 1)) {
+ if (SeqAnalysis.Loops[I].isLoopTransformation())
+ ++TransformIndex;
+ }
+
+ for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+ if (SeqAnalysis.Loops[I].isRegularLoop()) {
+ addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs,
+ SeqAnalysis.Loops[I].TheForStmt,
+ SeqAnalysis.Loops[I].OriginalInits, PreInits);
+ } else if (SeqAnalysis.Loops[I].isLoopTransformation()) {
+ // For transformed loops, insert both pre-inits and original inits.
+ // Order matters: pre-inits may define variables used in the original
+ // inits such as upper bounds...
+ SmallVector<Stmt *> &TransformPreInit =
+ SeqAnalysis.Loops[TransformIndex++].TransformsPreInits;
+ llvm::append_range(PreInits, TransformPreInit);
+
+ addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs,
+ SeqAnalysis.Loops[I].TheForStmt,
+ SeqAnalysis.Loops[I].OriginalInits, PreInits);
+ }
+ auto [UBVD, UBDStmt] =
+ CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.UB, "ub", J);
+ auto [LBVD, LBDStmt] =
+ CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.LB, "lb", J);
+ auto [STVD, STDStmt] =
+ CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.ST, "st", J);
+ auto [NIVD, NIDStmt] = CreateHelperVarAndStmt(
+ SeqAnalysis.Loops[I].HelperExprs.NumIterations, "ni", J, true);
+ auto [IVVD, IVDStmt] = CreateHelperVarAndStmt(
+ SeqAnalysis.Loops[I].HelperExprs.IterationVarRef, "iv", J);
+
+ assert(LBVD && STVD && NIVD && IVVD &&
+ "OpenMP Fuse Helper variables creation failed");
+
+ UBVarDecls.push_back(UBVD);
+ LBVarDecls.push_back(LBVD);
+ STVarDecls.push_back(STVD);
+ NIVarDecls.push_back(NIVD);
+ IVVarDecls.push_back(IVVD);
+
+ PreInits.push_back(LBDStmt.get());
+ PreInits.push_back(STDStmt.get());
+ PreInits.push_back(NIDStmt.get());
+ PreInits.push_back(IVDStmt.get());
+ }
+
+ auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) {
+ return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(),
+ false);
+ };
+
+ // Following up the creation of the final fused loop will be performed
+ // which has the following shape (considering the selected loops):
+ //
+ // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) {
+ // if (fuse.index < ni0){
+ // iv0 = lb0 + st0 * fuse.index;
+ // original.index0 = iv0
+ // body(0);
+ // }
+ // if (fuse.index < ni1){
+ // iv1 = lb1 + st1 * fuse.index;
+ // original.index1 = iv1
+ // body(1);
+ // }
+ //
+ // ...
+ //
+ // if (fuse.index < nik){
+ // ivk = lbk + stk * fuse.index;
+ // original.indexk = ivk
+ // body(k); Expr *InitVal = IntegerLiteral::Create(Context,
+ // llvm::APInt(IVWidth, 0),
+ // }
+
+ // 1. Create the initialized fuse index
+ StringRef IndexName = ".omp.fuse.index";
+ Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0),
+ IVType, SourceLocation());
+ VarDecl *IndexDecl =
+ buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr);
+ SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false);
+ StmtResult InitStmt = new (Context)
+ DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation());
+
+ if (!InitStmt.isUsable())
+ return StmtError();
+
+ auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType,
+ Loc = InitVal->getExprLoc()]() {
+ return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false);
+ };
+
+ // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2,
+ // ..., NI_k)
+ //
+ // This loop accumulates the maximum value across multiple expressions,
+ // ensuring each step constructs a unique AST node for correctness. By using
+ // intermediate temporary variables and conditional operators, we maintain
+ // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c):
+ // omp.temp0 = max(a, b)
+ // omp.temp1 = max(omp.temp0, c)
+ // omp.fuse.max = max(omp.temp1, omp.temp0)
+
+ ExprResult MaxExpr;
+ // I is the range of loops in the sequence that we fuse.
+ for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+ DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]);
+ QualType NITy = NIRef->getType();
+
+ if (MaxExpr.isUnset()) {
+ // Initialize MaxExpr with the first NI expression
+ MaxExpr = NIRef;
+ } else {
+ // Create a new acummulator variable t_i = MaxExpr
+ std::string TempName = (Twine(".omp.temp.") + Twine(J)).str();
+ VarDecl *TempDecl =
+ buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
+ TempDecl->setInit(MaxExpr.get());
+ DeclRefExpr *TempRef =
+ buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+ DeclRefExpr *TempRef2 =
+ buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+ // Add a DeclStmt to PreInits to ensure the variable is declared.
+ StmtResult TempStmt = new (Context)
+ DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation());
+
+ if (!TempStmt.isUsable())
+ return StmtError();
+ PreInits.push_back(TempStmt.get());
+
+ // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef)
+ ExprResult Comparison =
+ SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef);
+ // Handle any errors in Comparison creation
+ if (!Comparison.isUsable())
+ return StmtError();
+
+ DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]);
+ // Update MaxExpr using a conditional expression to hold the max value
+ MaxExpr = new (Context) ConditionalOperator(
+ Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
+ NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary);
+
+ if (!MaxExpr.isUsable())
+ return StmtError();
+ }
+ }
+ if (!MaxExpr.isUsable())
+ return StmtError();
+
+ // 3. Declare the max variable
+ const std::string MaxName = Twine(".omp.fuse.max").str();
+ VarDecl *MaxDecl =
+ buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr);
+ MaxDecl->setInit(MaxExpr.get());
+ DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false);
+ StmtResult MaxStmt = new (Context)
+ DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation());
+
+ if (MaxStmt.isInvalid())
+ return StmtError();
+ PreInits.push_back(MaxStmt.get());
+
+ // 4. Create condition Expr: index < n_max
+ ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT,
+ MakeIVRef(), MaxRef);
+ if (!CondExpr.isUsable())
+ return StmtError();
+
+ // 5. Increment Expr: ++index
+ ExprResult IncrExpr =
+ SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef());
+ if (!IncrExpr.isUsable())
+ return StmtError();
+
+ // 6. Build the Fused Loop Body
+ // The final fused loop iterates over the maximum logical range. Inside the
+ // loop, each original loop's index is calculated dynamically, and its body
+ // is executed conditionally.
+ //
+ // Each sub-loop's body is guarded by a conditional statement to ensure
+ // it executes only within its logical iteration range:
+ //
+ // if (fuse.index < ni_k){
+ // iv_k = lb_k + st_k * fuse.index;
+ // original.index = iv_k
+ // body(k);
+ // }
+
+ CompoundStmt *FusedBody = nullptr;
+ SmallVector<Stmt *, 4> FusedBodyStmts;
+ for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+ // Assingment of the original sub-loop index to compute the logical index
+ // IV_k = LB_k + omp.fuse.index * ST_k
+ ExprResult IdxExpr =
+ SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
+ MakeVarDeclRef(STVarDecls[J]), MakeIVRef());
+ if (!IdxExpr.isUsable())
+ return StmtError();
+ IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
+ MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get());
+
+ if (!IdxExpr.isUsable())
+ return StmtError();
+ IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
+ MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get());
+ if (!IdxExpr.isUsable())
+ return StmtError();
+
+ // Update the original i_k = IV_k
+ SmallVector<Stmt *, 4> BodyStmts;
+ BodyStmts.push_back(IdxExpr.get());
+ llvm::append_range(BodyStmts, SeqAnalysis.Loops[I].HelperExprs.Updates);
+
+ // If the loop is a CXXForRangeStmt then the iterator variable is needed
+ if (auto *SourceCXXFor =
+ dyn_cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].TheForStmt))
+ BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
+
+ Stmt *Body =
+ (isa<ForStmt>(SeqAnalysis.Loops[I].TheForStmt))
+ ? cast<ForStmt>(SeqAnalysis.Loops[I].TheForStmt)->getBody()
+ : cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].TheForStmt)->getBody();
+ BodyStmts.push_back(Body);
+
+ CompoundStmt *CombinedBody =
+ CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+ ExprResult Condition =
+ SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
+ MakeVarDeclRef(NIVarDecls[J]));
+
+ if (!Condition.isUsable())
+ return StmtError();
+
+ IfStmt *IfStatement = IfStmt::Create(
+ Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr,
+ Condition.get(), SourceLocation(), SourceLocation(), CombinedBody,
+ SourceLocation(), nullptr);
+
+ FusedBodyStmts.push_back(IfStatement);
+ }
+ FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+
+ // 7. Construct the final fused loop
+ ForStmt *FusedForStmt = new (Context)
+ ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(),
+ FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
+ IncrExpr.get()->getEndLoc());
+
+ // In the case of looprange, the result of fuse won't simply
+ // be a single loop (ForStmt), but rather a loop sequence
+ // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+ // and the post-fusion loops, preserving its original order.
+ //
+ // Note: If looprange clause produces a single fused loop nest then
+ // this compound statement wrapper is unnecessary (Therefore this
+ // treatment is skipped)
+
+ Stmt *FusionStmt = FusedForStmt;
+ if (LRC && CountVal != SeqAnalysis.LoopSeqSize) {
+ SmallVector<Stmt *, 4> FinalLoops;
+
+ // Reset the transform index
+ TransformIndex = 0;
+
+ // Collect all non-fused loops before and after the fused region.
+ // Pre-fusion and post-fusion loops are inserted in order exploiting their
+ // symmetry, along with their corresponding transformation pre-inits if
+ // needed. The fused loop is added between the two regions.
+ for (unsigned I : llvm::seq<unsigned>(SeqAnalysis.LoopSeqSize)) {
+ if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) {
+ // Update the Transformation counter to skip already treated
+ // loop transformations
+ if (!SeqAnalysis.Loops[I].isLoopTransformation())
+ ++TransformIndex;
+ continue;
+ }
+
+ // No need to handle:
+ // Regular loops: they are kept intact as-is.
+ // Loop-sequence-generating transformations: already handled earlier.
+ // Only TransformSingleLoop requires inserting pre-inits here
+ if (SeqAnalysis.Loops[I].isRegularLoop()) {
+ const auto &TransformPreInit =
+ SeqAnalysis.Loops[TransformIndex++].TransformsPreInits;
+ if (!TransformPreInit.empty())
+ llvm::append_range(PreInits, TransformPreInit);
+ }
+
+ FinalLoops.push_back(SeqAnalysis.Loops[I].TheForStmt);
+ }
+
+ FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt);
+ FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+ }
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+ NumGeneratedTopLevelLoops, AStmt, FusionStmt,
+ buildPreInits(Context, PreInits));
+}
+
OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
Expr *Expr,
SourceLocation StartLoc,
@@ -16887,6 +17637,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr,
FactorExpr);
}
+OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause(
+ Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) {
+
+ // OpenMP [6.0, Restrictions]
+ // First and Count must be integer expressions with positive value
+ ExprResult FirstVal =
+ VerifyPositiveIntegerConstantInClause(First, OMPC_looprange);
+ if (FirstVal.isInvalid())
+ First = nullptr;
+
+ ExprResult CountVal =
+ VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange);
+ if (CountVal.isInvalid())
+ Count = nullptr;
+
+ // OpenMP [6.0, Restrictions]
+ // first + count - 1 must not evaluate to a value greater than the
+ // loop sequence length of the associated canonical loop sequence.
+ // This check must be performed afterwards due to the delayed
+ // parsing and computation of the associated loop sequence
+ return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc,
+ FirstLoc, CountLoc, EndLoc, First, Count);
+}
+
OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 3ebbb30..2bf1511 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -7102,7 +7102,7 @@ ExprResult Sema::CheckTemplateArgument(NamedDecl *Param, QualType ParamType,
// If the parameter type somehow involves auto, deduce the type now.
DeducedType *DeducedT = ParamType->getContainedDeducedType();
- bool IsDeduced = DeducedT && !DeducedT->isDeduced();
+ bool IsDeduced = DeducedT && DeducedT->getDeducedType().isNull();
if (IsDeduced) {
// When checking a deduced template argument, deduce from its type even if
// the type is dependent, in order to check the types of non-type template
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 1ff94d7..f1c9c5c 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -616,29 +616,30 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
Invalid = true;
return;
}
- Invalid = CheckInstantiationDepth(PointOfInstantiation, InstantiationRange);
+
+ CodeSynthesisContext Inst;
+ Inst.Kind = Kind;
+ Inst.PointOfInstantiation = PointOfInstantiation;
+ Inst.Entity = Entity;
+ Inst.Template = Template;
+ Inst.TemplateArgs = TemplateArgs.data();
+ Inst.NumTemplateArgs = TemplateArgs.size();
+ Inst.DeductionInfo = DeductionInfo;
+ Inst.InstantiationRange = InstantiationRange;
+ Inst.InConstraintSubstitution =
+ Inst.Kind == CodeSynthesisContext::ConstraintSubstitution;
+ if (!SemaRef.CodeSynthesisContexts.empty())
+ Inst.InConstraintSubstitution |=
+ SemaRef.CodeSynthesisContexts.back().InConstraintSubstitution;
+
+ Invalid = SemaRef.pushCodeSynthesisContext(Inst);
if (!Invalid) {
- CodeSynthesisContext Inst;
- Inst.Kind = Kind;
- Inst.PointOfInstantiation = PointOfInstantiation;
- Inst.Entity = Entity;
- Inst.Template = Template;
- Inst.TemplateArgs = TemplateArgs.data();
- Inst.NumTemplateArgs = TemplateArgs.size();
- Inst.DeductionInfo = DeductionInfo;
- Inst.InstantiationRange = InstantiationRange;
- Inst.InConstraintSubstitution =
- Inst.Kind == CodeSynthesisContext::ConstraintSubstitution;
- if (!SemaRef.CodeSynthesisContexts.empty())
- Inst.InConstraintSubstitution |=
- SemaRef.CodeSynthesisContexts.back().InConstraintSubstitution;
-
- SemaRef.pushCodeSynthesisContext(Inst);
-
- AlreadyInstantiating = !Inst.Entity ? false :
- !SemaRef.InstantiatingSpecializations
- .insert({Inst.Entity->getCanonicalDecl(), Inst.Kind})
- .second;
+ AlreadyInstantiating =
+ !Inst.Entity
+ ? false
+ : !SemaRef.InstantiatingSpecializations
+ .insert({Inst.Entity->getCanonicalDecl(), Inst.Kind})
+ .second;
atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, Inst);
}
}
@@ -834,18 +835,34 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
: InstantiatingTemplate(SemaRef, CodeSynthesisContext::PartialOrderingTTP,
ArgLoc, InstantiationRange, PArg) {}
-void Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) {
+bool Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) {
Ctx.SavedInNonInstantiationSFINAEContext = InNonInstantiationSFINAEContext;
InNonInstantiationSFINAEContext = false;
- CodeSynthesisContexts.push_back(Ctx);
-
- if (!Ctx.isInstantiationRecord())
+ if (!Ctx.isInstantiationRecord()) {
++NonInstantiationEntries;
+ } else {
+ assert(SemaRef.NonInstantiationEntries <=
+ SemaRef.CodeSynthesisContexts.size());
+ if ((SemaRef.CodeSynthesisContexts.size() -
+ SemaRef.NonInstantiationEntries) >
+ SemaRef.getLangOpts().InstantiationDepth) {
+ SemaRef.Diag(Ctx.PointOfInstantiation,
+ diag::err_template_recursion_depth_exceeded)
+ << SemaRef.getLangOpts().InstantiationDepth << Ctx.InstantiationRange;
+ SemaRef.Diag(Ctx.PointOfInstantiation,
+ diag::note_template_recursion_depth)
+ << SemaRef.getLangOpts().InstantiationDepth;
+ return true;
+ }
+ }
+
+ CodeSynthesisContexts.push_back(Ctx);
// Check to see if we're low on stack space. We can't do anything about this
// from here, but we can at least warn the user.
StackHandler.warnOnStackNearlyExhausted(Ctx.PointOfInstantiation);
+ return false;
}
void Sema::popCodeSynthesisContext() {
@@ -907,25 +924,6 @@ static std::string convertCallArgsToString(Sema &S,
return Result;
}
-bool Sema::InstantiatingTemplate::CheckInstantiationDepth(
- SourceLocation PointOfInstantiation,
- SourceRange InstantiationRange) {
- assert(SemaRef.NonInstantiationEntries <=
- SemaRef.CodeSynthesisContexts.size());
- if ((SemaRef.CodeSynthesisContexts.size() -
- SemaRef.NonInstantiationEntries)
- <= SemaRef.getLangOpts().InstantiationDepth)
- return false;
-
- SemaRef.Diag(PointOfInstantiation,
- diag::err_template_recursion_depth_exceeded)
- << SemaRef.getLangOpts().InstantiationDepth
- << InstantiationRange;
- SemaRef.Diag(PointOfInstantiation, diag::note_template_recursion_depth)
- << SemaRef.getLangOpts().InstantiationDepth;
- return true;
-}
-
void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) {
// Determine which template instantiations to skip, if any.
unsigned SkipStart = CodeSynthesisContexts.size(), SkipEnd = SkipStart;
diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp
index c2427dcf..6c798d6 100644
--- a/clang/lib/Sema/SemaTypeTraits.cpp
+++ b/clang/lib/Sema/SemaTypeTraits.cpp
@@ -1163,13 +1163,16 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
// - it has at least one trivial eligible constructor and a trivial,
// non-deleted destructor.
const CXXDestructorDecl *Dtor = RD->getDestructor();
- if (UnqualT->isAggregateType())
- if (Dtor && !Dtor->isUserProvided())
- return true;
- if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted()))
- if (RD->hasTrivialDefaultConstructor() ||
- RD->hasTrivialCopyConstructor() || RD->hasTrivialMoveConstructor())
- return true;
+ if (UnqualT->isAggregateType() && (!Dtor || !Dtor->isUserProvided()))
+ return true;
+ if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) {
+ for (CXXConstructorDecl *Ctr : RD->ctors()) {
+ if (Ctr->isIneligibleOrNotSelected() || Ctr->isDeleted())
+ continue;
+ if (Ctr->isTrivial())
+ return true;
+ }
+ }
return false;
}
case UTT_IsIntangibleType:
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 0214078..6967301 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1783,6 +1783,14 @@ public:
LParenLoc, EndLoc);
}
+ OMPClause *
+ RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc) {
+ return getSema().OpenMP().ActOnOpenMPLoopRangeClause(
+ First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc);
+ }
+
/// Build a new OpenMP 'allocator' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
@@ -9609,6 +9617,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective(
template <typename Derived>
StmtResult
+TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) {
+ DeclarationNameInfo DirName;
+ getDerived().getSema().OpenMP().StartOpenMPDSABlock(
+ D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc());
+ StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+ getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get());
+ return Res;
+}
+
+template <typename Derived>
+StmtResult
TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().OpenMP().StartOpenMPDSABlock(
@@ -10502,6 +10521,31 @@ TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
template <typename Derived>
OMPClause *
+TreeTransform<Derived>::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ ExprResult F = getDerived().TransformExpr(C->getFirst());
+ if (F.isInvalid())
+ return nullptr;
+
+ ExprResult Cn = getDerived().TransformExpr(C->getCount());
+ if (Cn.isInvalid())
+ return nullptr;
+
+ Expr *First = F.get();
+ Expr *Count = Cn.get();
+
+ bool Changed = (First != C->getFirst()) || (Count != C->getCount());
+
+ // If no changes and AlwaysRebuild() is false, return the original clause
+ if (!Changed && !getDerived().AlwaysRebuild())
+ return C;
+
+ return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(),
+ C->getLParenLoc(), C->getFirstLoc(),
+ C->getCountLoc(), C->getEndLoc());
+}
+
+template <typename Derived>
+OMPClause *
TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
ExprResult E = getDerived().TransformExpr(C->getNumForLoops());
if (E.isInvalid())
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 9ee8a0f..c05e428 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11215,6 +11215,9 @@ OMPClause *OMPClauseReader::readClause() {
case llvm::omp::OMPC_partial:
C = OMPPartialClause::CreateEmpty(Context);
break;
+ case llvm::omp::OMPC_looprange:
+ C = OMPLoopRangeClause::CreateEmpty(Context);
+ break;
case llvm::omp::OMPC_allocator:
C = new (Context) OMPAllocatorClause();
break;
@@ -11618,6 +11621,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
C->setLParenLoc(Record.readSourceLocation());
}
+void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ C->setFirst(Record.readSubExpr());
+ C->setCount(Record.readSubExpr());
+ C->setLParenLoc(Record.readSourceLocation());
+ C->setFirstLoc(Record.readSourceLocation());
+ C->setCountLoc(Record.readSourceLocation());
+}
+
void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
C->setAllocator(Record.readExpr());
C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 213c2c2..70b898a 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2469,10 +2469,21 @@ void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) {
VisitOMPCanonicalLoopNestTransformationDirective(D);
}
+void ASTStmtReader::VisitOMPCanonicalLoopSequenceTransformationDirective(
+ OMPCanonicalLoopSequenceTransformationDirective *D) {
+ VisitStmt(D);
+ VisitOMPExecutableDirective(D);
+ D->setNumGeneratedTopLevelLoops(Record.readUInt32());
+}
+
void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
VisitOMPCanonicalLoopNestTransformationDirective(D);
}
+void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) {
+ VisitOMPCanonicalLoopSequenceTransformationDirective(D);
+}
+
void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
D->setHasCancel(Record.readBool());
@@ -3615,6 +3626,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
break;
}
+ case STMT_OMP_FUSE_DIRECTIVE: {
+ unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+ S = OMPFuseDirective::CreateEmpty(Context, NumClauses);
+ break;
+ }
+
case STMT_OMP_INTERCHANGE_DIRECTIVE: {
unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 09859da..cdf95ba 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7882,6 +7882,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) {
Record.AddSourceLocation(C->getLParenLoc());
}
+void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ Record.AddStmt(C->getFirst());
+ Record.AddStmt(C->getCount());
+ Record.AddSourceLocation(C->getLParenLoc());
+ Record.AddSourceLocation(C->getFirstLoc());
+ Record.AddSourceLocation(C->getCountLoc());
+}
+
void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
Record.AddStmt(C->getAllocator());
Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 21c04dd..ebda91e 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2487,6 +2487,18 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE;
}
+void ASTStmtWriter::VisitOMPCanonicalLoopSequenceTransformationDirective(
+ OMPCanonicalLoopSequenceTransformationDirective *D) {
+ VisitStmt(D);
+ VisitOMPExecutableDirective(D);
+ Record.writeUInt32(D->getNumGeneratedTopLevelLoops());
+}
+
+void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) {
+ VisitOMPCanonicalLoopSequenceTransformationDirective(D);
+ Code = serialization::STMT_OMP_FUSE_DIRECTIVE;
+}
+
void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
Record.writeBool(D->hasCancel());
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 36f316d..0ae784c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -672,6 +672,10 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
ProgramStateRef stateTrue, stateFalse;
+ if (!First.Expression->getType()->isAnyPointerType() ||
+ !Second.Expression->getType()->isAnyPointerType())
+ return state;
+
// Assume different address spaces cannot overlap.
if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
Second.Expression->getType()->getPointeeType().getAddressSpace())
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 785cdfa..4e472b7 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1814,6 +1814,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::OMPStripeDirectiveClass:
case Stmt::OMPTileDirectiveClass:
case Stmt::OMPInterchangeDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPInteropDirectiveClass:
case Stmt::OMPDispatchDirectiveClass:
case Stmt::OMPMaskedDirectiveClass:
diff --git a/clang/test/AST/ByteCode/cxx23.cpp b/clang/test/AST/ByteCode/cxx23.cpp
index 72c751d..ce0a4777 100644
--- a/clang/test/AST/ByteCode/cxx23.cpp
+++ b/clang/test/AST/ByteCode/cxx23.cpp
@@ -1,8 +1,8 @@
// UNSUPPORTED: target={{.*}}-zos{{.*}}
-// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref,ref20,all,all20 %s
-// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref,ref23,all,all23 %s
-// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all20 %s -fexperimental-new-constant-interpreter
-// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=expected23,all,all23 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=ref,ref20,all,all20 %s
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=ref,ref23,all,all23 %s
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=expected20,all,all20 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=expected23,all,all23 %s -fexperimental-new-constant-interpreter
#define assert_active(F) if (!__builtin_is_within_lifetime(&F)) (1/0);
@@ -393,6 +393,59 @@ namespace UnionMemberCallDiags {
static_assert(g()); // all-error {{not an integral constant expression}} \
// all-note {{in call to}}
}
+#endif
+
+namespace VolatileWrites {
+ constexpr void test1() {// all20-error {{never produces a constant expression}}
+ int k;
+ volatile int &m = k;
+ m = 10; // all20-note {{assignment to volatile-qualified type 'volatile int'}}
+ }
+ constexpr void test2() { // all20-error {{never produces a constant expression}}
+ volatile int k = 12;
+ k = 13; // all20-note {{assignment to volatile-qualified type 'volatile int'}}
+ }
+
+ constexpr void test3() { // all20-error {{never produces a constant expression}}
+ volatile int k = 12; // all20-note {{volatile object declared here}}
+
+ *((int *)&k) = 13; // all20-note {{assignment to volatile object 'k' is not allowed in a constant expression}}
+ }
+
+ constexpr void test4() { // all20-error {{never produces a constant expression}}
+ int k = 12;
+
+ *((volatile int *)&k) = 13; // all20-note {{assignment to volatile-qualified type 'volatile int' is not allowed in a constant expression}}
+ }
+
+#if __cplusplus >= 202302L
+ struct S {
+ volatile int k;
+ };
+ constexpr int test5() {
+ S s;
+ s.k = 12; // all-note {{assignment to volatile-qualified type 'volatile int' is not}}
+
+ return 0;
+ }
+ static_assert(test5() == 0); // all-error{{not an integral constant expression}} \
+ // all-note {{in call to}}
#endif
+
+ constexpr bool test6(volatile int k) { // ref20-error {{never produces a constant expression}}
+ k = 14; // ref20-note {{assignment to volatile-qualified type 'volatile int' is not}} \
+ // all-note {{assignment to volatile-qualified type 'volatile int' is not}}
+ return true;
+ }
+ static_assert(test6(5)); // all-error {{not an integral constant expression}} \
+ // all-note {{in call to}}
+
+ constexpr bool test7(volatile int k) { // all-note {{declared here}}
+ *((int *)&k) = 13; // all-note {{assignment to volatile object 'k' is not allowed in a constant expression}}
+ return true;
+ }
+ static_assert(test7(12)); // all-error {{not an integral constant expression}} \
+ // all-note {{in call to}}
+}
diff --git a/clang/test/AST/ByteCode/invalid.cpp b/clang/test/AST/ByteCode/invalid.cpp
index affb40ea..00db274 100644
--- a/clang/test/AST/ByteCode/invalid.cpp
+++ b/clang/test/AST/ByteCode/invalid.cpp
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -fexperimental-new-constant-interpreter -verify=expected,both %s
-// RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=ref,both %s
+// RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=ref,both %s
namespace Throw {
diff --git a/clang/test/Analysis/buffer-overlap-decls.c b/clang/test/Analysis/buffer-overlap-decls.c
new file mode 100644
index 0000000..4830f4e
--- /dev/null
+++ b/clang/test/Analysis/buffer-overlap-decls.c
@@ -0,0 +1,23 @@
+// RUN: %clang_analyze_cc1 -verify %s -Wno-incompatible-library-redeclaration \
+// RUN: -analyzer-checker=alpha.unix.cstring.BufferOverlap
+// expected-no-diagnostics
+
+typedef typeof(sizeof(int)) size_t;
+
+void memcpy(int dst, int src, size_t size);
+
+void test_memcpy_proxy() {
+ memcpy(42, 42, 42); // no-crash
+}
+
+void strcpy(int dst, char *src);
+
+void test_strcpy_proxy() {
+ strcpy(42, (char *)42); // no-crash
+}
+
+void strxfrm(int dst, char *src, size_t size);
+
+void test_strxfrm_proxy() {
+ strxfrm(42, (char *)42, 42); // no-crash
+}
diff --git a/clang/test/Analysis/buffer-overlap.c b/clang/test/Analysis/buffer-overlap.c
index 8414a76..defb17a 100644
--- a/clang/test/Analysis/buffer-overlap.c
+++ b/clang/test/Analysis/buffer-overlap.c
@@ -96,3 +96,10 @@ void test_snprintf6() {
char b[4] = {0};
snprintf(a, sizeof(a), "%s", b); // no-warning
}
+
+void* memcpy(void* dest, const void* src, size_t count);
+
+void test_memcpy_esoteric() {
+label:
+ memcpy((char *)&&label, (const char *)memcpy, 1);
+}
diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif
index 85e710f..501d27c 100644
--- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif
+++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif
@@ -141,4 +141,4 @@
}
],
"version": "[SARIF version]"
-} \ No newline at end of file
+}
diff --git a/clang/test/Analysis/lit.local.cfg b/clang/test/Analysis/lit.local.cfg
index 3d60a16..03ab418 100644
--- a/clang/test/Analysis/lit.local.cfg
+++ b/clang/test/Analysis/lit.local.cfg
@@ -17,11 +17,13 @@ config.substitutions.append(
)
)
+sed_cmd = "/opt/freeware/bin/sed" if "system-aix" in config.available_features else "sed"
+
# Filtering command for testing SARIF output against reference output.
config.substitutions.append(
(
"%normalize_sarif",
- "sed -r '%s;%s;%s;%s'"
+ f"{sed_cmd} -r '%s;%s;%s;%s'"
% (
# Replace version strings that are likely to change.
r's/"version": ".* version .*"/"version": "[clang version]"/',
diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp
index e901631..4c396d3 100644
--- a/clang/test/CIR/CodeGen/complex.cpp
+++ b/clang/test/CIR/CodeGen/complex.cpp
@@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() {
// OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
// OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half
// OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2
+
+void real_on_scalar_from_imag_with_type_promotion() {
+ _Float16 _Complex a;
+ _Float16 b = __real__(__imag__ a);
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16
+// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
+// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2
+// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2
+// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1
+// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
+// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0
+// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1
+// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
+// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2
+
+// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2
+// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2
+// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
+// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2
diff --git a/clang/test/CIR/CodeGen/delete.cpp b/clang/test/CIR/CodeGen/delete.cpp
new file mode 100644
index 0000000..f21d203
--- /dev/null
+++ b/clang/test/CIR/CodeGen/delete.cpp
@@ -0,0 +1,88 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -mconstructor-aliases -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+typedef __typeof(sizeof(int)) size_t;
+
+struct SizedDelete {
+ void operator delete(void*, size_t);
+ int member;
+};
+void test_sized_delete(SizedDelete *x) {
+ delete x;
+}
+
+// SizedDelete::operator delete(void*, unsigned long)
+// CIR: cir.func private @_ZN11SizedDeletedlEPvm(!cir.ptr<!void>, !u64i)
+// LLVM: declare void @_ZN11SizedDeletedlEPvm(ptr, i64)
+
+// CIR: cir.func dso_local @_Z17test_sized_deleteP11SizedDelete
+// CIR: %[[X:.*]] = cir.load{{.*}} %{{.*}}
+// CIR: %[[X_CAST:.*]] = cir.cast(bitcast, %[[X]] : !cir.ptr<!rec_SizedDelete>), !cir.ptr<!void>
+// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<4> : !u64i
+// CIR: cir.call @_ZN11SizedDeletedlEPvm(%[[X_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> ()
+
+// LLVM: define dso_local void @_Z17test_sized_deleteP11SizedDelete
+// LLVM: %[[X:.*]] = load ptr, ptr %{{.*}}
+// LLVM: call void @_ZN11SizedDeletedlEPvm(ptr %[[X]], i64 4)
+
+// OGCG: define dso_local void @_Z17test_sized_deleteP11SizedDelete
+// OGCG: %[[X:.*]] = load ptr, ptr %{{.*}}
+// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[X]], null
+// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]]
+// OGCG: [[DELETE_NOTNULL]]:
+// OGCG: call void @_ZN11SizedDeletedlEPvm(ptr noundef %[[X]], i64 noundef 4)
+
+// This function is declared below the call in OGCG.
+// OGCG: declare void @_ZN11SizedDeletedlEPvm(ptr noundef, i64 noundef)
+
+struct Contents {
+ ~Contents() {}
+};
+struct Container {
+ Contents *contents;
+ ~Container();
+};
+Container::~Container() { delete contents; }
+
+// Contents::~Contents()
+// CIR: cir.func comdat linkonce_odr @_ZN8ContentsD2Ev
+// LLVM: define linkonce_odr void @_ZN8ContentsD2Ev
+
+// operator delete(void*, unsigned long)
+// CIR: cir.func private @_ZdlPvm(!cir.ptr<!void>, !u64i)
+// LLVM: declare void @_ZdlPvm(ptr, i64)
+
+// Container::~Container()
+// CIR: cir.func dso_local @_ZN9ContainerD2Ev
+// CIR: %[[THIS:.*]] = cir.load %{{.*}}
+// CIR: %[[CONTENTS_PTR_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "contents"} : !cir.ptr<!rec_Container> -> !cir.ptr<!cir.ptr<!rec_Contents>>
+// CIR: %[[CONTENTS_PTR:.*]] = cir.load{{.*}} %[[CONTENTS_PTR_ADDR]]
+// CIR: cir.call @_ZN8ContentsD2Ev(%[[CONTENTS_PTR]]) nothrow : (!cir.ptr<!rec_Contents>) -> ()
+// CIR: %[[CONTENTS_CAST:.*]] = cir.cast(bitcast, %[[CONTENTS_PTR]] : !cir.ptr<!rec_Contents>), !cir.ptr<!void>
+// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<1> : !u64i
+// CIR: cir.call @_ZdlPvm(%[[CONTENTS_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> ()
+
+// LLVM: define dso_local void @_ZN9ContainerD2Ev
+// LLVM: %[[THIS:.*]] = load ptr, ptr %{{.*}}
+// LLVM: %[[CONTENTS_PTR_ADDR:.*]] = getelementptr %struct.Container, ptr %[[THIS]], i32 0, i32 0
+// LLVM: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS_PTR_ADDR]]
+// LLVM: call void @_ZN8ContentsD2Ev(ptr %[[CONTENTS_PTR]])
+// LLVM: call void @_ZdlPvm(ptr %[[CONTENTS_PTR]], i64 1)
+
+// OGCG: define dso_local void @_ZN9ContainerD2Ev
+// OGCG: %[[THIS:.*]] = load ptr, ptr %{{.*}}
+// OGCG: %[[CONTENTS:.*]] = getelementptr inbounds nuw %struct.Container, ptr %[[THIS]], i32 0, i32 0
+// OGCG: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS]]
+// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[CONTENTS_PTR]], null
+// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]]
+// OGCG: [[DELETE_NOTNULL]]:
+// OGCG: call void @_ZN8ContentsD2Ev(ptr noundef nonnull align 1 dereferenceable(1) %[[CONTENTS_PTR]])
+// OGCG: call void @_ZdlPvm(ptr noundef %[[CONTENTS_PTR]], i64 noundef 1)
+
+// These functions are declared/defined below the calls in OGCG.
+// OGCG: define linkonce_odr void @_ZN8ContentsD2Ev
+// OGCG: declare void @_ZdlPvm(ptr noundef, i64 noundef)
diff --git a/clang/test/CIR/CodeGen/lang-c-cpp.cpp b/clang/test/CIR/CodeGen/lang-c-cpp.cpp
index e126932..8931783 100644
--- a/clang/test/CIR/CodeGen/lang-c-cpp.cpp
+++ b/clang/test/CIR/CodeGen/lang-c-cpp.cpp
@@ -3,8 +3,8 @@
// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.c.cir
// RUN: FileCheck --check-prefix=CIR-C --input-file=%t.c.cir %s
-// CIR-CPP: module attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}}
-// CIR-C: module attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}}
+// CIR-CPP: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}}
+// CIR-C: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}}
int main() {
return 0;
diff --git a/clang/test/CIR/CodeGen/module-filename.cpp b/clang/test/CIR/CodeGen/module-filename.cpp
new file mode 100644
index 0000000..05e2e92
--- /dev/null
+++ b/clang/test/CIR/CodeGen/module-filename.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// Normally, we try to avoid checking the filename of a test, but that's the
+// entire point of this test, so we use a wildcard for the path but check the
+// filename.
+// CIR: module @"{{.*}}module-filename.cpp"
+
+int main() {
+ return 0;
+}
diff --git a/clang/test/CIR/CodeGen/opt-info-attr.cpp b/clang/test/CIR/CodeGen/opt-info-attr.cpp
index 444286b..97071d7 100644
--- a/clang/test/CIR/CodeGen/opt-info-attr.cpp
+++ b/clang/test/CIR/CodeGen/opt-info-attr.cpp
@@ -13,10 +13,10 @@
void f() {}
-// CHECK-O0: module attributes
+// CHECK-O0: module{{.*}} attributes
// CHECK-O0-NOT: cir.opt_info
-// CHECK-O1: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}}
-// CHECK-O2: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}}
-// CHECK-O3: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}}
-// CHECK-Os: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}}
-// CHECK-Oz: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}}
+// CHECK-O1: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}}
+// CHECK-O2: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}}
+// CHECK-O3: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}}
+// CHECK-Os: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}}
+// CHECK-Oz: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}}
diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp
index 9139651..4d57f8e 100644
--- a/clang/test/CIR/CodeGen/vbase.cpp
+++ b/clang/test/CIR/CodeGen/vbase.cpp
@@ -13,19 +13,29 @@ public:
class Derived : public virtual Base {};
-// This is just here to force the record types to be emitted.
void f() {
Derived d;
+ d.f();
+}
+
+class DerivedFinal final : public virtual Base {};
+
+void g() {
+ DerivedFinal df;
+ df.f();
}
// CIR: !rec_Base = !cir.record<class "Base" {!cir.vptr}>
// CIR: !rec_Derived = !cir.record<class "Derived" {!rec_Base}>
+// CIR: !rec_DerivedFinal = !cir.record<class "DerivedFinal" {!rec_Base}>
// LLVM: %class.Derived = type { %class.Base }
// LLVM: %class.Base = type { ptr }
+// LLVM: %class.DerivedFinal = type { %class.Base }
// OGCG: %class.Derived = type { %class.Base }
// OGCG: %class.Base = type { ptr }
+// OGCG: %class.DerivedFinal = type { %class.Base }
// Test the constructor handling for a class with a virtual base.
struct A {
@@ -47,6 +57,76 @@ void ppp() { B b; }
// OGCG: @_ZTV1B = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }, comdat, align 8
+// CIR: cir.func {{.*}}@_Z1fv() {
+// CIR: %[[D:.+]] = cir.alloca !rec_Derived, !cir.ptr<!rec_Derived>, ["d", init]
+// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> ()
+// CIR: %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
+// CIR: %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR: %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i
+// CIR: %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i>
+// CIR: %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR: %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
+// CIR: %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i>
+// CIR: %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i>
+// CIR: %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived>
+// CIR: %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base>
+// CIR: %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// CIR: %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>
+// CIR: %[[FN:.+]] = cir.load {{.*}} %[[SLOT_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>
+// CIR: cir.call %[[FN]](%[[BASE_THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> ()
+// CIR: cir.return
+
+// CIR: cir.func {{.*}}@_Z1gv() {
+// CIR: %[[DF:.+]] = cir.alloca !rec_DerivedFinal, !cir.ptr<!rec_DerivedFinal>, ["df", init]
+// CIR: cir.call @_ZN12DerivedFinalC1Ev(%[[DF]]) nothrow : (!cir.ptr<!rec_DerivedFinal>) -> ()
+// CIR: %[[BASE_THIS_2:.+]] = cir.base_class_addr %[[DF]] : !cir.ptr<!rec_DerivedFinal> nonnull [0] -> !cir.ptr<!rec_Base>
+// CIR: %[[BASE_VPTR_PTR_2:.+]] = cir.vtable.get_vptr %[[BASE_THIS_2]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// CIR: %[[BASE_VPTR_2:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR_2]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[SLOT_PTR_2:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR_2]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>
+// CIR: %[[FN_2:.+]] = cir.load {{.*}} %[[SLOT_PTR_2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>
+// CIR: cir.call %[[FN_2]](%[[BASE_THIS_2]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> ()
+// CIR: cir.return
+
+// LLVM: define {{.*}}void @_Z1fv()
+// LLVM: %[[D:.+]] = alloca {{.*}}
+// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]])
+// LLVM: %[[VPTR_ADDR:.+]] = load ptr, ptr %[[D]]
+// LLVM: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VPTR_ADDR]], i64 -32
+// LLVM: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]]
+// LLVM: %[[ADJ_THIS:.+]] = getelementptr i8, ptr %[[D]], i64 %[[OFF]]
+// LLVM: %[[VFN_TAB:.+]] = load ptr, ptr %[[ADJ_THIS]]
+// LLVM: %[[SLOT0:.+]] = getelementptr inbounds ptr, ptr %[[VFN_TAB]], i32 0
+// LLVM: %[[VFN:.+]] = load ptr, ptr %[[SLOT0]]
+// LLVM: call void %[[VFN]](ptr %[[ADJ_THIS]])
+// LLVM: ret void
+
+// LLVM: define {{.*}}void @_Z1gv()
+// LLVM: %[[DF:.+]] = alloca {{.*}}
+// LLVM: call void @_ZN12DerivedFinalC1Ev(ptr %[[DF]])
+// LLVM: %[[VPTR2:.+]] = load ptr, ptr %[[DF]]
+// LLVM: %[[SLOT0_2:.+]] = getelementptr inbounds ptr, ptr %[[VPTR2]], i32 0
+// LLVM: %[[VFN2:.+]] = load ptr, ptr %[[SLOT0_2]]
+// LLVM: call void %[[VFN2]](ptr %[[DF]])
+// LLVM: ret void
+
+// OGCG: define {{.*}}void @_Z1fv()
+// OGCG: %[[D:.+]] = alloca {{.*}}
+// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]])
+// OGCG: %[[VTABLE:.+]] = load ptr, ptr %[[D]]
+// OGCG: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VTABLE]], i64 -32
+// OGCG: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]]
+// OGCG: %[[ADJ_THIS:.+]] = getelementptr inbounds i8, ptr %[[D]], i64 %[[OFF]]
+// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[ADJ_THIS]])
+// OGCG: ret void
+
+// OGCG: define {{.*}}void @_Z1gv()
+// OGCG: %[[DF:.+]] = alloca {{.*}}
+// OGCG: call void @_ZN12DerivedFinalC1Ev(ptr {{.*}} %[[DF]])
+// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[DF]])
+// OGCG: ret void
+
// Constructor for B
// CIR: cir.func comdat linkonce_odr @_ZN1BC1Ev(%arg0: !cir.ptr<!rec_B>
// CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init]
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
index 8b5379a..8bca48d 100644
--- a/clang/test/CIR/CodeGen/vector-ext.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -1322,3 +1322,23 @@ void logical_not() {
// OGCG: %[[RESULT:.*]] = icmp eq <4 x i32> %[[TMP_A]], zeroinitializer
// OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32>
// OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16
+
+void unary_extension() {
+ vi4 a;
+ vi4 b = __extension__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
index d8fdeea..f242779 100644
--- a/clang/test/CIR/CodeGen/vector.cpp
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -1390,3 +1390,23 @@ void logical_not_float() {
// OGCG: %[[RESULT:.*]] = fcmp oeq <4 x float> %[[TMP_A]], zeroinitializer
// OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32>
// OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16
+
+void unary_extension() {
+ vi4 a;
+ vi4 b = __extension__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir
new file mode 100644
index 0000000..727c067
--- /dev/null
+++ b/clang/test/CIR/IR/global-init.cir
@@ -0,0 +1,48 @@
+// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+
+!rec_NeedsCtor = !cir.record<struct "NeedsCtor" padded {!u8i}>
+!rec_NeedsDtor = !cir.record<struct "NeedsDtor" padded {!u8i}>
+!rec_NeedsCtorDtor = !cir.record<struct "NeedsCtorDtor" padded {!u8i}>
+
+module attributes {cir.triple = "x86_64-unknown-linux-gnu"} {
+ cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>)
+ cir.global external @needsCtor = ctor : !rec_NeedsCtor {
+ %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+ cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+ }
+ // CHECK: cir.global external @needsCtor = ctor : !rec_NeedsCtor {
+ // CHECK: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+ // CHECK: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+ // CHECK: }
+
+ cir.func private @_ZN9NeedsDtorD1Ev(!cir.ptr<!rec_NeedsDtor>)
+ cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor {
+ %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor>
+ cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> ()
+ }
+ // CHECK: cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor {
+ // CHECK: %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor>
+ // CHECK: cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> ()
+ // CHECK: }
+
+ cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor {
+ %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ } dtor {
+ %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ }
+ // CHECK: cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ // CHECK: cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ // CHECK: cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor {
+ // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ // CHECK: cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ // CHECK: } dtor {
+ // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ // CHECK: cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ // CHECK: }
+}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 347cd9e..3018bb97 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -985,18 +985,21 @@ double test_mm256_cvtsd_f64(__m256d __a) {
// CHECK: extractelement <4 x double> %{{.*}}, i32 0
return _mm256_cvtsd_f64(__a);
}
+TEST_CONSTEXPR(_mm256_cvtsd_f64((__m256d){8.0, 7.0, 6.0, 5.0}) == 8.0);
int test_mm256_cvtsi256_si32(__m256i __a) {
// CHECK-LABEL: test_mm256_cvtsi256_si32
// CHECK: extractelement <8 x i32> %{{.*}}, i32 0
return _mm256_cvtsi256_si32(__a);
}
+TEST_CONSTEXPR(_mm256_cvtsi256_si32((__m256i)(__v8si){8, 7, 6, 5, 4, 3, 2, 1}) == 8);
float test_mm256_cvtss_f32(__m256 __a) {
// CHECK-LABEL: test_mm256_cvtss_f32
// CHECK: extractelement <8 x float> %{{.*}}, i32 0
return _mm256_cvtss_f32(__a);
}
+TEST_CONSTEXPR(_mm256_cvtss_f32((__m256){8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}) == 8.0f);
__m128i test_mm256_cvttpd_epi32(__m256d A) {
// CHECK-LABEL: test_mm256_cvttpd_epi32
diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c
index ded40ca..d0ae0c7 100644
--- a/clang/test/CodeGen/X86/bmi-builtins.c
+++ b/clang/test/CodeGen/X86/bmi-builtins.c
@@ -1,7 +1,16 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
-// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT
-// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
-// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64
+
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64
#include <immintrin.h>
@@ -48,20 +57,20 @@ unsigned int test_tzcnt_u32(unsigned int __X) {
#ifdef __x86_64__
unsigned long long test__tzcnt_u64(unsigned long long __X) {
-// TZCNT-LABEL: test__tzcnt_u64
-// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+// TZCNT64-LABEL: test__tzcnt_u64
+// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return __tzcnt_u64(__X);
}
long long test_mm_tzcnt_64(unsigned long long __X) {
-// TZCNT-LABEL: test_mm_tzcnt_64
-// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+// TZCNT64-LABEL: test_mm_tzcnt_64
+// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _mm_tzcnt_64(__X);
}
unsigned long long test_tzcnt_u64(unsigned long long __X) {
-// TZCNT-LABEL: test_tzcnt_u64
-// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+// TZCNT64-LABEL: test_tzcnt_u64
+// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _tzcnt_u64(__X);
}
#endif
@@ -103,36 +112,36 @@ unsigned int test__blsr_u32(unsigned int __X) {
#ifdef __x86_64__
unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) {
-// CHECK-LABEL: test__andn_u64
-// CHECK: xor i64 %{{.*}}, -1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__andn_u64
+// X64: xor i64 %{{.*}}, -1
+// X64: and i64 %{{.*}}, %{{.*}}
return __andn_u64(__X, __Y);
}
unsigned long long test__bextr_u64(unsigned long __X, unsigned long __Y) {
-// CHECK-LABEL: test__bextr_u64
-// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
+// X64-LABEL: test__bextr_u64
+// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
return __bextr_u64(__X, __Y);
}
unsigned long long test__blsi_u64(unsigned long long __X) {
-// CHECK-LABEL: test__blsi_u64
-// CHECK: sub i64 0, %{{.*}}
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__blsi_u64
+// X64: sub i64 0, %{{.*}}
+// X64: and i64 %{{.*}}, %{{.*}}
return __blsi_u64(__X);
}
unsigned long long test__blsmsk_u64(unsigned long long __X) {
-// CHECK-LABEL: test__blsmsk_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: xor i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__blsmsk_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: xor i64 %{{.*}}, %{{.*}}
return __blsmsk_u64(__X);
}
unsigned long long test__blsr_u64(unsigned long long __X) {
-// CHECK-LABEL: test__blsr_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__blsr_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: and i64 %{{.*}}, %{{.*}}
return __blsr_u64(__X);
}
#endif
@@ -186,49 +195,49 @@ unsigned int test_blsr_u32(unsigned int __X) {
#ifdef __x86_64__
unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) {
-// CHECK-LABEL: test_andn_u64
-// CHECK: xor i64 %{{.*}}, -1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_andn_u64
+// X64: xor i64 %{{.*}}, -1
+// X64: and i64 %{{.*}}, %{{.*}}
return _andn_u64(__X, __Y);
}
unsigned long long test_bextr_u64(unsigned long __X, unsigned int __Y,
unsigned int __Z) {
-// CHECK-LABEL: test_bextr_u64
-// CHECK: and i32 %{{.*}}, 255
-// CHECK: and i32 %{{.*}}, 255
-// CHECK: shl i32 %{{.*}}, 8
-// CHECK: or i32 %{{.*}}, %{{.*}}
-// CHECK: zext i32 %{{.*}} to i64
-// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
+// X64-LABEL: test_bextr_u64
+// X64: and i32 %{{.*}}, 255
+// X64: and i32 %{{.*}}, 255
+// X64: shl i32 %{{.*}}, 8
+// X64: or i32 %{{.*}}, %{{.*}}
+// X64: zext i32 %{{.*}} to i64
+// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
return _bextr_u64(__X, __Y, __Z);
}
unsigned long long test_bextr2_u64(unsigned long long __X,
unsigned long long __Y) {
-// CHECK-LABEL: test_bextr2_u64
-// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
+// X64-LABEL: test_bextr2_u64
+// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
return _bextr2_u64(__X, __Y);
}
unsigned long long test_blsi_u64(unsigned long long __X) {
-// CHECK-LABEL: test_blsi_u64
-// CHECK: sub i64 0, %{{.*}}
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_blsi_u64
+// X64: sub i64 0, %{{.*}}
+// X64: and i64 %{{.*}}, %{{.*}}
return _blsi_u64(__X);
}
unsigned long long test_blsmsk_u64(unsigned long long __X) {
-// CHECK-LABEL: test_blsmsk_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: xor i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_blsmsk_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: xor i64 %{{.*}}, %{{.*}}
return _blsmsk_u64(__X);
}
unsigned long long test_blsr_u64(unsigned long long __X) {
-// CHECK-LABEL: test_blsr_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_blsr_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: and i64 %{{.*}}, %{{.*}}
return _blsr_u64(__X);
}
#endif
diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c
index 48424f5..1b2cb90 100644
--- a/clang/test/CodeGen/X86/bmi2-builtins.c
+++ b/clang/test/CodeGen/X86/bmi2-builtins.c
@@ -3,6 +3,11 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32
+
#include <immintrin.h>
diff --git a/clang/test/CodeGen/X86/tbm-builtins.c b/clang/test/CodeGen/X86/tbm-builtins.c
index d916627..89746bf 100644
--- a/clang/test/CodeGen/X86/tbm-builtins.c
+++ b/clang/test/CodeGen/X86/tbm-builtins.c
@@ -1,5 +1,12 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK
+
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
#include <x86intrin.h>
@@ -13,14 +20,14 @@ unsigned int test__bextri_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__bextri_u64(unsigned long long a) {
- // CHECK-LABEL: test__bextri_u64
- // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2)
+ // X64-LABEL: test__bextri_u64
+ // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2)
return __bextri_u64(a, 2);
}
unsigned long long test__bextri_u64_bigint(unsigned long long a) {
- // CHECK-LABEL: test__bextri_u64_bigint
- // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887)
+ // X64-LABEL: test__bextri_u64_bigint
+ // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887)
return __bextri_u64(a, 0x7fffffffffLL);
}
#endif
@@ -34,9 +41,9 @@ unsigned int test__blcfill_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcfill_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcfill_u64
- // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
- // CHECK: %{{.*}} = and i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blcfill_u64
+ // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1
+ // X64: %{{.*}} = and i64 %{{.*}}, [[TMP]]
return __blcfill_u64(a);
}
#endif
@@ -51,10 +58,10 @@ unsigned int test__blci_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blci_u64(unsigned long long a) {
- // CHECK-LABEL: test__blci_u64
- // CHECK: [[TMP1:%.*]] = add i64 %{{.*}}, 1
- // CHECK: [[TMP2:%.*]] = xor i64 [[TMP1]], -1
- // CHECK: %{{.*}} = or i64 %{{.*}}, [[TMP2]]
+ // X64-LABEL: test__blci_u64
+ // X64: [[TMP1:%.*]] = add i64 %{{.*}}, 1
+ // X64: [[TMP2:%.*]] = xor i64 [[TMP1]], -1
+ // X64: %{{.*}} = or i64 %{{.*}}, [[TMP2]]
return __blci_u64(a);
}
#endif
@@ -69,10 +76,10 @@ unsigned int test__blcic_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcic_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcic_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__blcic_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
return __blcic_u64(a);
}
#endif
@@ -86,9 +93,9 @@ unsigned int test__blcmsk_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcmsk_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcmsk_u64
- // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blcmsk_u64
+ // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]]
return __blcmsk_u64(a);
}
#endif
@@ -102,9 +109,9 @@ unsigned int test__blcs_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcs_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcs_u64
- // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blcs_u64
+ // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
return __blcs_u64(a);
}
#endif
@@ -118,9 +125,9 @@ unsigned int test__blsfill_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blsfill_u64(unsigned long long a) {
- // CHECK-LABEL: test__blsfill_u64
- // CHECK: [[TMP:%.*]] = sub i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blsfill_u64
+ // X64: [[TMP:%.*]] = sub i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
return __blsfill_u64(a);
}
#endif
@@ -135,10 +142,10 @@ unsigned int test__blsic_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blsic_u64(unsigned long long a) {
- // CHECK-LABEL: test__blsic_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__blsic_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
return __blsic_u64(a);
}
#endif
@@ -153,10 +160,10 @@ unsigned int test__t1mskc_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__t1mskc_u64(unsigned long long a) {
- // CHECK-LABEL: test__t1mskc_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__t1mskc_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
return __t1mskc_u64(a);
}
#endif
@@ -171,10 +178,10 @@ unsigned int test__tzmsk_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__tzmsk_u64(unsigned long long a) {
- // CHECK-LABEL: test__tzmsk_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__tzmsk_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
return __tzmsk_u64(a);
}
#endif
diff --git a/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c
new file mode 100644
index 0000000..ef68c79
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @test_locals(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IMG:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[IMG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMG]] to ptr
+// CHECK-NEXT: #dbg_declare(ptr addrspace(5) [[IMG]], [[META11:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META14:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG15:![0-9]+]]
+// CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]]
+//
+void test_locals(void) {
+ __amdgpu_texture_t img;
+ (void)img;
+}
diff --git a/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp
new file mode 100644
index 0000000..0dbd517
--- /dev/null
+++ b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp
@@ -0,0 +1,7 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s
+namespace std { class type_info; }
+auto &a = typeid(__amdgpu_texture_t);
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
index 5920ced..137a49b 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj(
// CHECK-NEXT: entry:
@@ -21,6 +24,43 @@
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter64Py(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter32PVj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter64PVy(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() {
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global32() {
global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
@@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global64() {
global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
@@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant32() {
__UINT32_TYPE__ local_val;
@@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant64() {
__UINT64_TYPE__ local_val;
@@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope64() {
__attribute__((shared)) __UINT64_TYPE__ val;
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index 1e977dd..dd1ca45 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -12,6 +15,25 @@
// CHECK-NEXT: fence syncscope("agent") acq_rel
// CHECK-NEXT: fence syncscope("workgroup") release
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv(
+// GCN-SAME: ) #[[ATTR0:[0-9]+]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst
+// GCN-NEXT: fence syncscope("agent") acquire
+// GCN-NEXT: fence seq_cst
+// GCN-NEXT: fence syncscope("agent") acq_rel
+// GCN-NEXT: fence syncscope("workgroup") release
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire
+// AMDGCNSPIRV-NEXT: fence seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release
+// AMDGCNSPIRV-NEXT: ret void
//
void test_memory_fence_success() {
@@ -35,6 +57,25 @@ void test_memory_fence_success() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_localv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_local() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -58,6 +99,25 @@ void test_local() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z11test_globalv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]]
+// GCN-NEXT: fence seq_cst, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_global() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
@@ -80,6 +140,25 @@ void test_global() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_imagev(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_image() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -99,13 +178,33 @@ void test_image() {
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_mixedv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_mixed() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global");
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local");
}
-//.
// CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
// CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
// CHECK: [[META5]] = !{[[META4]], [[META3]]}
//.
+// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// GCN: [[META5]] = !{[[META4]], [[META3]]}
+//.
+// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]}
+//.
diff --git a/clang/test/CodeGenCXX/gh56652.cpp b/clang/test/CodeGenCXX/gh56652.cpp
new file mode 100644
index 0000000..06a496e
--- /dev/null
+++ b/clang/test/CodeGenCXX/gh56652.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s
+
+namespace GH56652{
+
+struct foo {};
+
+template <typename T> struct bar {
+ using type = T;
+
+ template <foo> inline static constexpr auto b = true;
+};
+
+template <typename T>
+concept C = requires(T a) { T::template b<foo{}>; };
+
+template <typename T> auto fn(T) {
+ if constexpr (!C<T>)
+ return foo{};
+ else
+ return T{};
+}
+
+auto a = decltype(fn(bar<int>{})){};
+
+}
+
+namespace GH116319 {
+
+template <int = 0> struct a {
+template <class> static constexpr auto b = 2;
+template <class> static void c() noexcept(noexcept(b<int>)) {}
+};
+
+void test() { a<>::c<int>(); }
+
+
+}
+
+// CHECK: %"struct.GH56652::bar" = type { i8 }
+// CHECK: $_ZN8GH1163191aILi0EE1cIiEEvv = comdat any
+// CHECK: @_ZN7GH566521aE = global %"struct.GH56652::bar" undef
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index 19ab656..7cd3f14 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -1,13 +1,13 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
typedef unsigned int uint;
typedef unsigned long ulong;
@@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() {
}
// CHECK-LABEL: @test_global_add_f32
-// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
#if !defined(__SPIRV__)
void test_global_add_f32(float *rtn, global float *addr, float x) {
#else
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 5f202ba..6bb20bf 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,9 +1,9 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
@@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1)
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
#if !defined(__SPIRV__)
@@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) {
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
@@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 039d032..ab0b0b9 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -1231,7 +1231,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu
// CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4
res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup");
- // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4
res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent");
// CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4
diff --git a/clang/test/Driver/modules-print-library-module-manifest-path.cpp b/clang/test/Driver/modules-print-library-module-manifest-path.cpp
index 7606713..af0f124 100644
--- a/clang/test/Driver/modules-print-library-module-manifest-path.cpp
+++ b/clang/test/Driver/modules-print-library-module-manifest-path.cpp
@@ -18,6 +18,14 @@
// RUN: --target=x86_64-linux-gnu 2>&1 \
// RUN: | FileCheck libcxx.cpp
+// check that -nostdlib causes no library-provided module manifest to
+// be reported, even when libc++.modules.json is present.
+// RUN: %clang -print-library-module-manifest-path \
+// RUN: -nostdlib \
+// RUN: -resource-dir=%t/Inputs/usr/lib/x86_64-linux-gnu \
+// RUN: --target=x86_64-linux-gnu 2>&1 \
+// RUN: | FileCheck libcxx-no-module-json.cpp
+
// for macos there is a different directory structure
// where the library and libc++.modules.json file are in lib
// directly but headers are in clang/ver directory which
diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp
index ced5bca..8eb9ea0 100644
--- a/clang/test/Lexer/cxx-features.cpp
+++ b/clang/test/Lexer/cxx-features.cpp
@@ -148,7 +148,7 @@
// init_captures checked below
-#if check(modules, 0, 0, 0, 0, 0, 0, 0)
+#if check(modules, 0, 0, 0, 0, 1, 1, 1)
// FIXME: 201907 in C++20
#error "wrong value for __cpp_modules"
#endif
diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp
index 83632db..cb4bcc9 100644
--- a/clang/test/OpenMP/for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen.cpp
@@ -27,7 +27,6 @@ struct S {
~S() {}
};
-
template <typename T, int length>
T tmain() {
T t;
@@ -60,6 +59,15 @@ T tmain() {
}
extern S<float> **foo();
+int g_arr[10];
+
+void reductionArrayElement() {
+#pragma omp parallel
+#pragma omp for reduction(+:g_arr[1])
+ for (int i = 0; i < 10; i++) {
+ g_arr[1] += i;
+ }
+}
int main() {
#ifdef LAMBDA
@@ -164,6 +172,7 @@ int main() {
#pragma omp for reduction(& : var3)
for (int i = 0; i < 10; ++i)
;
+ reductionArrayElement();
return tmain<int, 42>();
#endif
}
@@ -535,6 +544,26 @@ int main() {
//.
// CHECK4: @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer, align 8
//.
+
+// CHECK1-LABEL: define {{.*}}reductionArrayElement{{.*}}.omp_outlined{{.*}}
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1: [[G_ARR:%.*]] = alloca i32, align 4
+// CHECK1: [[TMP0:%.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr @g_arr to i64){{.*}}
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[G_ARR:%.*]], i64 [[TMP0]]
+// CHECK1: omp.inner.for.body:
+// CHECK1: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]],{{.+}}
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
+// CHECK1: omp.loop.exit:
+// CHECK1-NEXT: call void {{.*}}__kmpc_for_static_fini{{.+}}
+// CHECK1: {{.*}}call i32 {{.*}}__kmpc_reduce{{.+}}
+// CHECK1: omp.reduction.default:
+// CHECK1-NEXT: call void @__kmpc_barrier{{.+}}
+// CHECK1-NEXT: ret void
+//
+
// CHECK1-LABEL: define {{[^@]+}}@main
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
@@ -614,6 +643,7 @@ int main() {
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.11, ptr [[TMP7]])
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR3]], align 8
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.12, ptr [[TMP8]])
+// CHECK1-NEXT: call void {{.*}}reductionArrayElement{{.*}}
// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v()
// CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
new file mode 100644
index 0000000..283f588
--- /dev/null
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -0,0 +1,397 @@
+// Check no warnings/errors
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST and unparsing
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code
+extern "C" void body(...);
+
+// PRINT-LABEL: void foo1(
+// DUMP-LABEL: FunctionDecl {{.*}} foo1
+void foo1() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+
+}
+
+// PRINT-LABEL: void foo2(
+// DUMP-LABEL: FunctionDecl {{.*}} foo2
+void foo2() {
+ // PRINT: #pragma omp unroll partial(4)
+ // DUMP: OMPUnrollDirective
+ // DUMP-NEXT: OMPPartialClause
+ // DUMP-NEXT: ConstantExpr
+ // DUMP-NEXT: value: Int 4
+ // DUMP-NEXT: IntegerLiteral {{.*}} 4
+ #pragma omp unroll partial(4)
+ // PRINT: #pragma omp fuse
+ // DUMP-NEXT: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+
+}
+
+//PRINT-LABEL: void foo3(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3
+template<int Factor1, int Factor2>
+void foo3() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: #pragma omp unroll partial(Factor1)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(Factor1)
+ // PRINT: for (int i = 0; i < 12; i += 1)
+ // DUMP: ForStmt
+ for (int i = 0; i < 12; i += 1)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: #pragma omp unroll partial(Factor2)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(Factor2)
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo3() {
+ foo3<4,2>();
+}
+
+//PRINT-LABEL: void foo4(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4
+template<typename T, T Step>
+void foo4(int start, int end) {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (T i = start; i < end; i += Step)
+ // DUMP: ForStmt
+ for (T i = start; i < end; i += Step)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+
+ // PRINT: for (T j = end; j > start; j -= Step)
+ // DUMP: ForStmt
+ for (T j = end; j > start; j -= Step) {
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo4() {
+ foo4<int, 4>(0, 64);
+}
+
+
+
+// PRINT-LABEL: void foo5(
+// DUMP-LABEL: FunctionDecl {{.*}} foo5
+void foo5() {
+ double arr[128], arr2[128];
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT-NEXT: for (auto &&a : arr)
+ // DUMP-NEXT: CXXForRangeStmt
+ for (auto &&a: arr)
+ // PRINT: body(a)
+ // DUMP: CallExpr
+ body(a);
+ // PRINT: for (double v = 42; auto &&b : arr)
+ // DUMP: CXXForRangeStmt
+ for (double v = 42; auto &&b: arr)
+ // PRINT: body(b, v);
+ // DUMP: CallExpr
+ body(b, v);
+ // PRINT: for (auto &&c : arr2)
+ // DUMP: CXXForRangeStmt
+ for (auto &&c: arr2)
+ // PRINT: body(c)
+ // DUMP: CallExpr
+ body(c);
+
+ }
+
+}
+
+// PRINT-LABEL: void foo6(
+// DUMP-LABEL: FunctionDecl {{.*}} foo6
+void foo6() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i <= 10; ++i)
+ // DUMP: ForStmt
+ for (int i = 0; i <= 10; ++i)
+ body(i);
+ // PRINT: for (int j = 0; j < 100; ++j)
+ // DUMP: ForStmt
+ for(int j = 0; j < 100; ++j)
+ body(j);
+ }
+ // PRINT: #pragma omp unroll partial(4)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(4)
+ // PRINT: for (int k = 0; k < 250; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k < 250; ++k)
+ body(k);
+ }
+}
+
+// PRINT-LABEL: void foo7(
+// DUMP-LABEL: FunctionDecl {{.*}} foo7
+void foo7() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+ }
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+ }
+ }
+ }
+ }
+
+}
+
+// PRINT-LABEL: void foo8(
+// DUMP-LABEL: FunctionDecl {{.*}} foo8
+void foo8() {
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+
+}
+
+//PRINT-LABEL: void foo9(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C
+template<int F, int C>
+void foo9() {
+ // PRINT: #pragma omp fuse looprange(F,C)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(F,C)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo9() {
+ foo9<1, 2>();
+}
+
+// PRINT-LABEL: void foo10(
+// DUMP-LABEL: FunctionDecl {{.*}} foo10
+void foo10() {
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int ii = 0; ii < 10; ii += 2)
+ // DUMP: ForStmt
+ for (int ii = 0; ii < 10; ii += 2)
+ // PRINT: body(ii)
+ // DUMP: CallExpr
+ body(ii);
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ {
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int jj = 10; jj > 0; --jj)
+ // DUMP: ForStmt
+ for (int jj = 10; jj > 0; --jj)
+ // PRINT: body(jj)
+ // DUMP: CallExpr
+ body(jj);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+ // PRINT: for (int kk = 0; kk <= 10; ++kk)
+ // DUMP: ForStmt
+ for (int kk = 0; kk <= 10; ++kk)
+ // PRINT: body(kk)
+ // DUMP: CallExpr
+ body(kk);
+ }
+ }
+
+}
+
+#endif
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
new file mode 100644
index 0000000..742c280
--- /dev/null
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -0,0 +1,2328 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
+// expected-no-diagnostics
+
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
+
+#ifndef HEADER
+#define HEADER
+
+//placeholder for loop body code.
+extern "C" void body(...) {}
+
+extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) {
+ int i,j;
+ #pragma omp fuse
+ {
+ for(i = start1; i < end1; i += step1) body(i);
+ for(j = start2; j < end2; j += step2) body(j);
+ }
+
+}
+
+template <typename T>
+void foo2(T start, T end, T step){
+ T i,j,k;
+ #pragma omp fuse
+ {
+ for(i = start; i < end; i += step) body(i);
+ for(j = end; j > start; j -= step) body(j);
+ for(k = start+step; k < end+step; k += step) body(k);
+ }
+}
+
+extern "C" void tfoo2() {
+ foo2<int>(0, 64, 4);
+}
+
+extern "C" void foo3() {
+ double arr[256];
+ #pragma omp fuse
+ {
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ }
+ for(int c = 42; auto &&v: arr) body(c,v);
+ for(int cc = 37; auto &&vv: arr) body(cc, vv);
+ }
+}
+
+extern "C" void foo4() {
+ double arr[256];
+
+ #pragma omp fuse looprange(2,2)
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ for(int k = 0; k < 64; ++k) body(k);
+ for(int c = 42; auto &&v: arr) body(c,v);
+ }
+}
+
+// This exemplifies the usage of loop transformations that generate
+// more than top level canonical loop nests (e.g split, loopranged fuse...)
+extern "C" void foo5() {
+ double arr[256];
+ #pragma omp fuse looprange(2,2)
+ {
+ #pragma omp fuse looprange(2,2)
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ for(int k = 0; k < 512; ++k) body(k);
+ }
+ for(int c = 42; auto &&v: arr) body(c,v);
+ for(int cc = 37; auto &&vv: arr) body(cc, vv);
+ }
+}
+
+
+#endif
+// CHECK1-LABEL: define dso_local void @body(
+// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo1(
+// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK1: [[IF_THEN22]]:
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
+// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
+// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]])
+// CHECK1-NEXT: br label %[[IF_END27]]
+// CHECK1: [[IF_END27]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @tfoo2(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
+// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1
+// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
+// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK1: [[COND_TRUE30]]:
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: br label %[[COND_END32:.*]]
+// CHECK1: [[COND_FALSE31]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: br label %[[COND_END32]]
+// CHECK1: [[COND_END32]]:
+// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
+// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
+// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
+// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK1: [[IF_THEN40]]:
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
+// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
+// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]])
+// CHECK1-NEXT: br label %[[IF_END45]]
+// CHECK1: [[IF_END45]]:
+// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1: [[IF_THEN47]]:
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
+// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
+// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]])
+// CHECK1-NEXT: br label %[[IF_END52]]
+// CHECK1: [[IF_END52]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo3(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK1: [[COND_TRUE42]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: br label %[[COND_END44:.*]]
+// CHECK1: [[COND_FALSE43]]:
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: br label %[[COND_END44]]
+// CHECK1: [[COND_END44]]:
+// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK1: [[COND_TRUE48]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: br label %[[COND_END50:.*]]
+// CHECK1: [[COND_FALSE49]]:
+// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: br label %[[COND_END50]]
+// CHECK1: [[COND_END50]]:
+// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN62]]:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK1: [[IF_THEN68]]:
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT: br label %[[IF_END73]]
+// CHECK1: [[IF_END73]]:
+// CHECK1-NEXT: br label %[[IF_END74]]
+// CHECK1: [[IF_END74]]:
+// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK1: [[IF_THEN76]]:
+// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK1-NEXT: br label %[[IF_END81]]
+// CHECK1: [[IF_END81]]:
+// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK1: [[IF_THEN83]]:
+// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK1-NEXT: br label %[[IF_END88]]
+// CHECK1: [[IF_END88]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo4(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]])
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND2:.*]]
+// CHECK1: [[FOR_COND2]]:
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK1: [[FOR_BODY4]]:
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK1: [[IF_THEN9]]:
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]])
+// CHECK1-NEXT: br label %[[IF_END14]]
+// CHECK1: [[IF_END14]]:
+// CHECK1-NEXT: br label %[[FOR_INC15:.*]]
+// CHECK1: [[FOR_INC15]]:
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1: [[FOR_END17]]:
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND19:.*]]
+// CHECK1: [[FOR_COND19]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK1: [[FOR_BODY21]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK1-NEXT: br label %[[FOR_INC22:.*]]
+// CHECK1: [[FOR_INC22]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND19]]
+// CHECK1: [[FOR_END23]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo5(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK1: [[COND_TRUE24]]:
+// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: br label %[[COND_END26:.*]]
+// CHECK1: [[COND_FALSE25]]:
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: br label %[[COND_END26]]
+// CHECK1: [[COND_END26]]:
+// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]])
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND30:.*]]
+// CHECK1: [[FOR_COND30]]:
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK1: [[FOR_BODY32]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN41]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1: [[IF_THEN47]]:
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT: br label %[[IF_END52]]
+// CHECK1: [[IF_END52]]:
+// CHECK1-NEXT: br label %[[IF_END53]]
+// CHECK1: [[IF_END53]]:
+// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK1: [[IF_THEN55]]:
+// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK1-NEXT: br label %[[IF_END60]]
+// CHECK1: [[IF_END60]]:
+// CHECK1-NEXT: br label %[[FOR_INC61:.*]]
+// CHECK1: [[FOR_INC61]]:
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1: [[FOR_END63]]:
+// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND70:.*]]
+// CHECK1: [[FOR_COND70]]:
+// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK1: [[FOR_BODY72]]:
+// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK1-NEXT: br label %[[FOR_INC73:.*]]
+// CHECK1: [[FOR_INC73]]:
+// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND70]]
+// CHECK1: [[FOR_END74]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @body(
+// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo1(
+// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
+// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK2: [[IF_THEN22]]:
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
+// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
+// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]])
+// CHECK2-NEXT: br label %[[IF_END27]]
+// CHECK2: [[IF_END27]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo3(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK2: [[COND_TRUE42]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: br label %[[COND_END44:.*]]
+// CHECK2: [[COND_FALSE43]]:
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: br label %[[COND_END44]]
+// CHECK2: [[COND_END44]]:
+// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK2: [[COND_TRUE48]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: br label %[[COND_END50:.*]]
+// CHECK2: [[COND_FALSE49]]:
+// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: br label %[[COND_END50]]
+// CHECK2: [[COND_END50]]:
+// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN62]]:
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK2: [[IF_THEN68]]:
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT: br label %[[IF_END73]]
+// CHECK2: [[IF_END73]]:
+// CHECK2-NEXT: br label %[[IF_END74]]
+// CHECK2: [[IF_END74]]:
+// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK2: [[IF_THEN76]]:
+// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK2-NEXT: br label %[[IF_END81]]
+// CHECK2: [[IF_END81]]:
+// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK2: [[IF_THEN83]]:
+// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK2-NEXT: br label %[[IF_END88]]
+// CHECK2: [[IF_END88]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo4(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]])
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND2:.*]]
+// CHECK2: [[FOR_COND2]]:
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK2: [[FOR_BODY4]]:
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK2: [[IF_THEN9]]:
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]])
+// CHECK2-NEXT: br label %[[IF_END14]]
+// CHECK2: [[IF_END14]]:
+// CHECK2-NEXT: br label %[[FOR_INC15:.*]]
+// CHECK2: [[FOR_INC15]]:
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK2: [[FOR_END17]]:
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND19:.*]]
+// CHECK2: [[FOR_COND19]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK2: [[FOR_BODY21]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK2-NEXT: br label %[[FOR_INC22:.*]]
+// CHECK2: [[FOR_INC22]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND19]]
+// CHECK2: [[FOR_END23]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo5(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK2: [[COND_TRUE24]]:
+// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: br label %[[COND_END26:.*]]
+// CHECK2: [[COND_FALSE25]]:
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: br label %[[COND_END26]]
+// CHECK2: [[COND_END26]]:
+// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]])
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND30:.*]]
+// CHECK2: [[FOR_COND30]]:
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK2: [[FOR_BODY32]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN41]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2: [[IF_THEN47]]:
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT: br label %[[IF_END52]]
+// CHECK2: [[IF_END52]]:
+// CHECK2-NEXT: br label %[[IF_END53]]
+// CHECK2: [[IF_END53]]:
+// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK2: [[IF_THEN55]]:
+// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK2-NEXT: br label %[[IF_END60]]
+// CHECK2: [[IF_END60]]:
+// CHECK2-NEXT: br label %[[FOR_INC61:.*]]
+// CHECK2: [[FOR_INC61]]:
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK2: [[FOR_END63]]:
+// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND70:.*]]
+// CHECK2: [[FOR_COND70]]:
+// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK2: [[FOR_BODY72]]:
+// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK2-NEXT: br label %[[FOR_INC73:.*]]
+// CHECK2: [[FOR_INC73]]:
+// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND70]]
+// CHECK2: [[FOR_END74]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @tfoo2(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
+// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1
+// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
+// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK2: [[COND_TRUE30]]:
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: br label %[[COND_END32:.*]]
+// CHECK2: [[COND_FALSE31]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: br label %[[COND_END32]]
+// CHECK2: [[COND_END32]]:
+// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
+// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
+// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK2: [[IF_THEN40]]:
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
+// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
+// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]])
+// CHECK2-NEXT: br label %[[IF_END45]]
+// CHECK2: [[IF_END45]]:
+// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2: [[IF_THEN47]]:
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
+// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
+// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]])
+// CHECK2-NEXT: br label %[[IF_END52]]
+// CHECK2: [[IF_END52]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//.
+// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
+//.
+// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
+//.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
new file mode 100644
index 0000000..b86ce95
--- /dev/null
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -0,0 +1,209 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s
+
+void func() {
+
+ // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+ #pragma omp fuse
+ ;
+
+ // expected-error@+2 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {int bar = 0;}
+
+ // expected-error@+4 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ int x = 2;
+ }
+
+ // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+ #pragma omp fuse
+ #pragma omp for
+ for (int i = 0; i < 7; ++i)
+ ;
+
+ {
+ // expected-error@+2 {{expected statement}}
+ #pragma omp fuse
+ }
+
+ // expected-warning@+1 {{extra tokens at the end of '#pragma omp fuse' are ignored}}
+ #pragma omp fuse foo
+ {
+ for (int i = 0; i < 7; ++i)
+ ;
+ for(int j = 0; j < 100; ++j);
+
+ }
+
+
+ // expected-error@+1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}}
+ #pragma omp fuse final(0)
+ {
+ for (int i = 0; i < 7; ++i)
+ ;
+ for(int j = 0; j < 100; ++j);
+
+ }
+
+ //expected-error@+3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; i*=2) {
+ ;
+ }
+ for(int j = 0; j < 100; ++j);
+ }
+
+ //expected-error@+2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
+ #pragma omp fuse
+ {}
+
+ //expected-error@+3 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {
+ #pragma omp unroll full
+ for(int i = 0; i < 10; ++i);
+
+ for(int j = 0; j < 10; ++j);
+ }
+
+ //expected-warning@+2 {{looprange clause selects a single loop, resulting in redundant fusion}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ }
+
+ //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(1, 1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(1, -1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(1, 0)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ const int x = 1;
+ constexpr int y = 4;
+ //expected-error@+1 {{looprange clause selects loops from 1 to 4 but this exceeds the number of loops (3) in the loop sequence}}
+ #pragma omp fuse looprange(x,y)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+ //expected-error@+1 {{looprange clause selects loops from 1 to 420 but this exceeds the number of loops (3) in the loop sequence}}
+ #pragma omp fuse looprange(1,420)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+ //expected-error@+1 {{looprange clause selects loops from 1 to 6 but this exceeds the number of loops (5) in the loop sequence}}
+ #pragma omp fuse looprange(1,6)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ // This fusion results in 2 loops
+ #pragma omp fuse looprange(1,2)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+ }
+
+ //expected-error@+1 {{looprange clause selects loops from 2 to 4 but this exceeds the number of loops (3) in the loop sequence}}
+ #pragma omp fuse looprange(2,3)
+ {
+ #pragma omp unroll partial(2)
+ for(int i = 0; i < 10; ++i);
+
+ #pragma omp reverse
+ for(int j = 0; j < 10; ++j);
+
+ #pragma omp fuse
+ {
+ {
+ #pragma omp reverse
+ for(int j = 0; j < 10; ++j);
+ }
+ for(int k = 0; k < 50; ++k);
+ }
+ }
+}
+
+// In a template context, but expression itself not instantiation-dependent
+template <typename T>
+static void templated_func() {
+
+ //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(2,1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+ //expected-error@+1 {{looprange clause selects loops from 3 to 5 but this exceeds the number of loops (3) in the loop sequence}}
+ #pragma omp fuse looprange(3,3)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+}
+
+template <int V>
+static void templated_func_value_dependent() {
+
+ //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(V,1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+template <typename T>
+static void templated_func_type_dependent() {
+ constexpr T s = 1;
+
+ //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(s,s-1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+
+void template_inst() {
+ // expected-note@+1 {{in instantiation of function template specialization 'templated_func<int>' requested here}}
+ templated_func<int>();
+ // expected-note@+1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}}
+ templated_func_value_dependent<1>();
+ // expected-note@+1 {{in instantiation of function template specialization 'templated_func_type_dependent<int>' requested here}}
+ templated_func_type_dependent<int>();
+}
+
+
diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
index 7ffb7aae..8c7a778 100644
--- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
+++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
@@ -1,9 +1,7 @@
-// RUN: %clang_cc1 -std=c++20 %s -verify=cxx20
-// RUN: %clang_cc1 -std=c++23 %s -verify=cxx23
-// RUN: %clang_cc1 -std=c++23 -Wpre-c++23-compat %s -verify=precxx23
-// RUN: %clang_cc1 -std=c++23 -pedantic %s -verify=cxx23
-
-//cxx23-no-diagnostics
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++20 %s -verify=cxx20
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 %s -verify=cxx23
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -Wpre-c++23-compat %s -verify=precxx23
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -pedantic %s -verify=cxx23
auto L1 = [] constexpr {};
// cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}}
@@ -14,3 +12,25 @@ auto L3 = [] static {};
// cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}}
// cxx20-warning@-2 {{static lambdas are a C++23 extension}}
// precxx23-warning@-3 {{static lambdas are incompatible with C++ standards before C++23}}
+
+namespace GH161070 {
+void t1() { int a = [] __arm_streaming; }
+// precxx23-error@-1 {{'__arm_streaming' cannot be applied to a declaration}}
+// precxx23-error@-2 {{expected body of lambda expression}}
+// cxx23-error@-3 {{'__arm_streaming' cannot be applied to a declaration}}
+// cxx23-error@-4 {{expected body of lambda expression}}
+// cxx20-error@-5 {{'__arm_streaming' cannot be applied to a declaration}}
+// cxx20-error@-6 {{expected body of lambda expression}}
+// cxx20-warning@-7 {{'__arm_streaming' in this position is a C++23 extension}}
+// precxx23-warning@-8 {{'__arm_streaming' in this position is incompatible with C++ standards before C++23}}
+
+void t2() { int a = [] [[assume(true)]]; }
+// precxx23-error@-1 {{'assume' attribute cannot be applied to a declaration}}
+// precxx23-error@-2 {{expected body of lambda expression}}
+// cxx23-error@-3 {{'assume' attribute cannot be applied to a declaration}}
+// cxx23-error@-4 {{expected body of lambda expression}}
+// cxx20-error@-5 {{'assume' attribute cannot be applied to a declaration}}
+// cxx20-error@-6 {{expected body of lambda expression}}
+// cxx20-warning@-7 {{an attribute specifier sequence in this position is a C++23 extension}}
+// precxx23-warning@-8 {{an attribute specifier sequence in this position is incompatible with C++ standards before C++23}}
+}
diff --git a/clang/test/SemaCUDA/vararg.cu b/clang/test/SemaCUDA/vararg.cu
index 34ef367..0238f42 100644
--- a/clang/test/SemaCUDA/vararg.cu
+++ b/clang/test/SemaCUDA/vararg.cu
@@ -10,7 +10,7 @@
#include <stdarg.h>
#include "Inputs/cuda.h"
-__device__ void foo() {
+__global__ void foo() {
va_list list;
va_arg(list, int);
#ifdef EXPECT_VA_ARG_ERR
diff --git a/clang/test/SemaCXX/amdgpu-image-rsrc.cpp b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp
new file mode 100644
index 0000000..61a82d4
--- /dev/null
+++ b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s
+
+void foo() {
+ int n = 100;
+ __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}}
+ static_cast<__amdgpu_texture_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_texture_t' is not allowed}}
+ reinterpret_cast<__amdgpu_texture_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_texture_t' is not allowed}}
+ (void)(v + v); // expected-error {{invalid operands to binary expression ('__amdgpu_texture_t' and '__amdgpu_texture_t')}}
+ int x(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_texture_t'}}
+ __amdgpu_texture_t k;
+}
+
+template<class T> void bar(T);
+void use(__amdgpu_texture_t r) { bar(r); }
+struct S { __amdgpu_texture_t r; int a; };
diff --git a/clang/test/SemaCXX/bitfield-layout.cpp b/clang/test/SemaCXX/bitfield-layout.cpp
index 7efd1d3..f30218b 100644
--- a/clang/test/SemaCXX/bitfield-layout.cpp
+++ b/clang/test/SemaCXX/bitfield-layout.cpp
@@ -35,7 +35,7 @@ CHECK_SIZE(Test4, 8);
CHECK_ALIGN(Test4, 8);
struct Test5 {
- char c : 0x100000001; // expected-warning {{width of bit-field 'c' (4294967297 bits) exceeds the width of its type; value will be truncated to 8 bits}}
+ char c : 0x100000001; // expected-warning {{width of bit-field 'c' (4'294'967'297 bits) exceeds the width of its type; value will be truncated to 8 bits}}
};
// Size and align don't really matter here, just make sure we don't crash.
CHECK_SIZE(Test5, 1);
diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp
index 739485b..45a4c4c 100644
--- a/clang/test/SemaCXX/decltype.cpp
+++ b/clang/test/SemaCXX/decltype.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wno-c99-designator %s
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wno-c99-designator %s
// PR5290
int const f0();
@@ -156,6 +157,8 @@ struct A {
}
};
+
+
// This shouldn't crash.
static_assert(A<int>().f<int>() == 0, "");
// The result should not be dependent.
@@ -163,6 +166,81 @@ static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion
// expected-note@-1 {{expression evaluates to '0 != 0'}}
}
+
+#if __cplusplus >= 201703L
+namespace GH160497 {
+
+template <class> struct S {
+ template <class>
+ inline static auto mem =
+ [] { static_assert(false); // expected-error {{static assertion failed}} \
+ // expected-note {{while substituting into a lambda expression here}}
+ return 42;
+ }();
+};
+
+using T = decltype(S<void>::mem<void>);
+ // expected-note@-1 {{in instantiation of static data member 'GH160497::S<void>::mem<void>' requested here}}
+
+
+template <class> struct S2 {
+ template <class>
+ inline static auto* mem =
+ [] { static_assert(false); // expected-error {{static assertion failed}} \
+ // expected-note {{while substituting into a lambda expression here}}
+ return static_cast<int*>(nullptr);
+ }();
+};
+
+using T2 = decltype(S2<void>::mem<void>);
+//expected-note@-1 {{in instantiation of static data member 'GH160497::S2<void>::mem<void>' requested here}}
+
+template <class> struct S3 {
+ template <class>
+ inline static int mem = // Check we don't instantiate when the type is not deduced.
+ [] { static_assert(false);
+ return 42;
+ }();
+};
+
+using T = decltype(S3<void>::mem<void>);
+}
+
+namespace N1 {
+
+template<class>
+struct S {
+ template<class>
+ inline static auto mem = 42;
+};
+
+using T = decltype(S<void>::mem<void>);
+
+T y = 42;
+
+}
+
+namespace GH161196 {
+
+template <typename> struct A {
+ static constexpr int digits = 0;
+};
+
+template <typename> struct B {
+ template <int, typename MaskInt = int, int = A<MaskInt>::digits>
+ static constexpr auto XBitMask = 0;
+};
+
+struct C {
+ using ReferenceHost = B<int>;
+ template <int> static decltype(ReferenceHost::XBitMask<0>) XBitMask;
+};
+
+void test() { (void)C::XBitMask<0>; }
+
+}
+#endif
+
template<typename>
class conditional {
};
diff --git a/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp b/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp
index 097ada3..436dfb9 100644
--- a/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp
+++ b/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp
@@ -17,8 +17,7 @@ constexpr bool A<x>::far() {
b.data_member;
requires A<x-1>::far(); // #Invalid
// expected-error@#Invalid {{recursive template instantiation exceeded maximum depth}}
- // expected-note@#Invalid {{in instantiation}}
- // expected-note@#Invalid 2 {{while}}
+ // expected-note@#Invalid 3 {{while}}
// expected-note@#Invalid {{contexts in backtrace}}
// expected-note@#Invalid {{increase recursive template instantiation depth}}
};
diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp
index 3f01247..d49330f 100644
--- a/clang/test/SemaCXX/type-traits.cpp
+++ b/clang/test/SemaCXX/type-traits.cpp
@@ -2038,6 +2038,49 @@ void is_implicit_lifetime(int n) {
static_assert(__builtin_is_implicit_lifetime(int * __restrict));
}
+namespace GH160610 {
+class NonAggregate {
+public:
+ NonAggregate() = default;
+
+ NonAggregate(const NonAggregate&) = delete;
+ NonAggregate& operator=(const NonAggregate&) = delete;
+private:
+ int num;
+};
+
+class DataMemberInitializer {
+public:
+ DataMemberInitializer() = default;
+
+ DataMemberInitializer(const DataMemberInitializer&) = delete;
+ DataMemberInitializer& operator=(const DataMemberInitializer&) = delete;
+private:
+ int num = 0;
+};
+
+class UserProvidedConstructor {
+public:
+ UserProvidedConstructor() {}
+
+ UserProvidedConstructor(const UserProvidedConstructor&) = delete;
+ UserProvidedConstructor& operator=(const UserProvidedConstructor&) = delete;
+};
+
+static_assert(__builtin_is_implicit_lifetime(NonAggregate));
+static_assert(!__builtin_is_implicit_lifetime(DataMemberInitializer));
+static_assert(!__builtin_is_implicit_lifetime(UserProvidedConstructor));
+
+#if __cplusplus >= 202002L
+template <typename T>
+class Tpl {
+ Tpl() requires false = default ;
+};
+static_assert(!__builtin_is_implicit_lifetime(Tpl<int>));
+
+#endif
+}
+
void is_signed()
{
//static_assert(__is_signed(char));
diff --git a/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl
new file mode 100644
index 0000000..dc56494
--- /dev/null
+++ b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl
@@ -0,0 +1,13 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa %s
+// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa %s
+
+void f() {
+ int n = 3;
+ __amdgpu_texture_t v = (__amdgpu_texture_t)0; // expected-error {{used type '__amdgpu_texture_t' where arithmetic or pointer type is required}}
+ int k = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_texture_t'}}
+ (void)(v + v); // expected-error {{invalid operands}}
+ __amdgpu_texture_t r;
+ int *p = (int*)r; // expected-error {{operand of type '__amdgpu_texture_t' where arithmetic or pointer type is required}}
+}
diff --git a/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp
new file mode 100644
index 0000000..51b3f72
--- /dev/null
+++ b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp
@@ -0,0 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s
+
+void foo() {
+#pragma omp target
+ {
+ int n = 5;
+ __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}}
+ (void)(v + v); // expected-error {{invalid operands to binary expression}}
+ }
+}
diff --git a/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp b/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp
index 2b519e9..66fd1af 100644
--- a/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp
+++ b/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp
@@ -2,5 +2,6 @@
template<int N> struct S { };
template<typename T> S<T() + T()> operator+(T, T); // expected-error {{instantiation exceeded maximum depth}} expected-note 2{{while substituting}}
+// expected-note@-1 {{use -ftemplate-depth=N to increase recursive template instantiation depth}}
S<0> s;
int k = s + s; // expected-note {{while substituting}}
diff --git a/clang/test/SemaTemplate/instantiation-depth-subst.cpp b/clang/test/SemaTemplate/instantiation-depth-subst.cpp
index 062a8ed..17944bc 100644
--- a/clang/test/SemaTemplate/instantiation-depth-subst.cpp
+++ b/clang/test/SemaTemplate/instantiation-depth-subst.cpp
@@ -3,7 +3,8 @@
// PR9793
template<typename T> auto f(T t) -> decltype(f(t)); // \
// expected-error {{recursive template instantiation exceeded maximum depth of 2}} \
-// expected-note 2 {{while substituting}}
+// expected-note 2 {{while substituting}} \
+// expected-note {{use -ftemplate-depth=N to increase recursive template instantiation depth}}
struct S {};
int k = f(S{}); // expected-note {{while substituting}}
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 0e2758d..e41f4eb 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -420,7 +420,7 @@ public:
std::vector<ModuleDeps *> NewMDs;
{
std::unique_lock<std::mutex> ul(Lock);
- for (const ModuleDeps &MD : Graph) {
+ for (ModuleDeps &MD : Graph) {
auto I = Modules.find({MD.ID, 0});
if (I != Modules.end()) {
I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 5aab743..30e2be7 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2148,6 +2148,9 @@ public:
void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
void VisitOMPReverseDirective(const OMPReverseDirective *D);
void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D);
+ void VisitOMPCanonicalLoopSequenceTransformationDirective(
+ const OMPCanonicalLoopSequenceTransformationDirective *D);
+ void VisitOMPFuseDirective(const OMPFuseDirective *D);
void VisitOMPForDirective(const OMPForDirective *D);
void VisitOMPForSimdDirective(const OMPForSimdDirective *D);
void VisitOMPSectionsDirective(const OMPSectionsDirective *D);
@@ -2353,6 +2356,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) {
Visitor->AddStmt(C->getFactor());
}
+void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+ Visitor->AddStmt(C->getFirst());
+ Visitor->AddStmt(C->getCount());
+}
+
void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
Visitor->AddStmt(C->getAllocator());
}
@@ -3317,6 +3325,15 @@ void EnqueueVisitor::VisitOMPInterchangeDirective(
VisitOMPCanonicalLoopNestTransformationDirective(D);
}
+void EnqueueVisitor::VisitOMPCanonicalLoopSequenceTransformationDirective(
+ const OMPCanonicalLoopSequenceTransformationDirective *D) {
+ VisitOMPExecutableDirective(D);
+}
+
+void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) {
+ VisitOMPCanonicalLoopSequenceTransformationDirective(D);
+}
+
void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {
VisitOMPLoopDirective(D);
}
@@ -6275,6 +6292,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
return cxstring::createRef("OMPReverseDirective");
case CXCursor_OMPInterchangeDirective:
return cxstring::createRef("OMPInterchangeDirective");
+ case CXCursor_OMPFuseDirective:
+ return cxstring::createRef("OMPFuseDirective");
case CXCursor_OMPForDirective:
return cxstring::createRef("OMPForDirective");
case CXCursor_OMPForSimdDirective:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 3c40624..56f113c 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -687,6 +687,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
case Stmt::OMPInterchangeDirectiveClass:
K = CXCursor_OMPInterchangeDirective;
break;
+ case Stmt::OMPFuseDirectiveClass:
+ K = CXCursor_OMPFuseDirective;
+ break;
case Stmt::OMPForDirectiveClass:
K = CXCursor_OMPForDirective;
break;
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 6a3385a..fef7036 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -1364,6 +1364,27 @@ TEST_F(FormatTest, FormatIfWithoutCompoundStatementButElseWith) {
AllowsMergedIf);
}
+TEST_F(FormatTest, WrapMultipleStatementIfAndElseBraces) {
+ auto Style = getLLVMStyle();
+ Style.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Always;
+ Style.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_AllIfsAndElse;
+ Style.BreakBeforeBraces = FormatStyle::BS_Custom;
+ Style.BraceWrapping.AfterControlStatement = FormatStyle::BWACS_Always;
+ Style.BraceWrapping.BeforeElse = true;
+
+ verifyFormat("if (x)\n"
+ "{\n"
+ " ++x;\n"
+ " --y;\n"
+ "}\n"
+ "else\n"
+ "{\n"
+ " --x;\n"
+ " ++y;\n"
+ "}",
+ Style);
+}
+
TEST_F(FormatTest, FormatLoopsWithoutCompoundStatement) {
verifyFormat("while (true)\n"
" ;");
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 899cc47..4a8f27f 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2237,6 +2237,12 @@ TEST_F(TokenAnnotatorTest, UnderstandsLambdas) {
ASSERT_EQ(Tokens.size(), 21u) << Tokens;
EXPECT_TOKEN(Tokens[11], tok::l_square, TT_LambdaLSquare);
EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_LambdaLBrace);
+
+ Tokens = annotate("SomeFunction({[]() -> int *[] { return {}; }});");
+ ASSERT_EQ(Tokens.size(), 22u) << Tokens;
+ EXPECT_TOKEN(Tokens[3], tok::l_square, TT_LambdaLSquare);
+ EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_LambdaDefinitionLParen);
+ EXPECT_TOKEN(Tokens[10], tok::l_square, TT_ArraySubscriptLSquare);
}
TEST_F(TokenAnnotatorTest, UnderstandsFunctionAnnotations) {
@@ -4159,6 +4165,14 @@ TEST_F(TokenAnnotatorTest, LineCommentTrailingBackslash) {
EXPECT_TOKEN(Tokens[1], tok::comment, TT_LineComment);
}
+TEST_F(TokenAnnotatorTest, ArrowAfterSubscript) {
+ auto Tokens =
+ annotate("return (getStructType()->getElements())[eIdx]->getName();");
+ ASSERT_EQ(Tokens.size(), 19u) << Tokens;
+ // Not TT_LambdaArrow.
+ EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown);
+}
+
TEST_F(TokenAnnotatorTest, QtProperty) {
auto Style = getLLVMStyle();
Style.AllowBreakBeforeQtProperty = true;